diff --git "a/checkpoint-2668/trainer_state.json" "b/checkpoint-2668/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-2668/trainer_state.json" @@ -0,0 +1,38383 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9992509363295881, + "global_step": 2668, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.8518518518518518e-05, + "loss": 11.3968, + "theoretical_loss": 10.87642657795271, + "tokens_seen": 1048576 + }, + { + "epoch": 0.0, + "learning_rate": 3.7037037037037037e-05, + "loss": 11.3956, + "theoretical_loss": 9.382678282406216, + "tokens_seen": 2097152 + }, + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 10.5571, + "theoretical_loss": 8.634232609231233, + "tokens_seen": 3145728 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.4819035232067108, + "objective/train/docs_used": 13098, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 9.65994930267334, + "objective/train/original_loss": 9.659950256347656, + "objective/train/theoretical_loss": 8.563481156106828, + "objective/train/tokens_used": 23736800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2415286898612976, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494179725646973, + "objective/train/weighted_lm_loss": 10.137272834777832, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9525310397148132, + "theoretical_loss": 8.563481156106828, + "tokens_seen": 3276800 + }, + { + "epoch": 0.0, + "learning_rate": 7.407407407407407e-05, + "loss": 10.1633, + "theoretical_loss": 8.152440604135377, + "tokens_seen": 4194304 + }, + { + "epoch": 0.0, + "learning_rate": 9.259259259259259e-05, + "loss": 10.086, + "theoretical_loss": 7.804563746449924, + "tokens_seen": 5242880 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001111111111111111, + "loss": 9.6408, + "theoretical_loss": 7.536027470795679, + "tokens_seen": 6291456 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.45301443338394165, + "objective/train/docs_used": 15224, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 9.799736976623535, + "objective/train/original_loss": 9.799737930297852, + "objective/train/theoretical_loss": 7.477757209543791, + "objective/train/tokens_used": 27013600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23901385068893433, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.046515703201294, + "objective/train/weighted_lm_loss": 10.25350570678711, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9514407515525818, + "theoretical_loss": 7.477757209543791, + "tokens_seen": 6553600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012962962962962963, + "loss": 9.8744, + "theoretical_loss": 7.319437165569436, + "tokens_seen": 7340032 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014814814814814815, + "loss": 9.604, + "theoretical_loss": 7.139227903207399, + "tokens_seen": 8388608 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016666666666666666, + "loss": 9.5326, + "theoretical_loss": 6.985769514638539, + "tokens_seen": 9437184 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.47840288281440735, + "objective/train/docs_used": 16449, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 9.037789344787598, + "objective/train/original_loss": 9.037790298461914, + "objective/train/theoretical_loss": 6.9337529803906595, + "objective/train/tokens_used": 30290400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23556047677993774, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490374565124512, + "objective/train/weighted_lm_loss": 9.48002815246582, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9567553997039795, + "theoretical_loss": 6.9337529803906595, + "tokens_seen": 9830400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018518518518518518, + "loss": 9.4931, + "theoretical_loss": 6.85271964810239, + "tokens_seen": 10485760 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002037037037037037, + "loss": 9.2078, + "theoretical_loss": 6.735696451044834, + "tokens_seen": 11534336 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002222222222222222, + "loss": 9.1417, + "theoretical_loss": 6.631555583663063, + "tokens_seen": 12582912 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.48848089575767517, + "objective/train/docs_used": 18197, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 8.314473152160645, + "objective/train/original_loss": 8.314474105834961, + "objective/train/theoretical_loss": 6.583564719922174, + "objective/train/tokens_used": 33567200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2417714148759842, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500770807266235, + "objective/train/weighted_lm_loss": 8.729887962341309, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9551335573196411, + "theoretical_loss": 6.583564719922174, + "tokens_seen": 13107200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00024074074074074072, + "loss": 8.9637, + "theoretical_loss": 6.537970167599786, + "tokens_seen": 13631488 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025925925925925926, + "loss": 8.8478, + "theoretical_loss": 6.453173753851489, + "tokens_seen": 14680064 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002777777777777778, + "loss": 8.6147, + "theoretical_loss": 6.3757961938167265, + "tokens_seen": 15728640 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.48041555285453796, + "objective/train/docs_used": 19928, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 7.850920677185059, + "objective/train/original_loss": 7.850921154022217, + "objective/train/theoretical_loss": 6.330712056611843, + "objective/train/tokens_used": 36844000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23706857860088348, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492464303970337, + "objective/train/weighted_lm_loss": 8.236246109008789, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9536896347999573, + "theoretical_loss": 6.330712056611843, + "tokens_seen": 16384000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002962962962962963, + "loss": 8.4719, + "theoretical_loss": 6.304755024141699, + "tokens_seen": 16777216 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003148148148148148, + "loss": 8.2341, + "theoretical_loss": 6.239181372596676, + "tokens_seen": 17825792 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003333333333333333, + "loss": 8.0206, + "theoretical_loss": 6.17836807379203, + "tokens_seen": 18874368 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.4757058024406433, + "objective/train/docs_used": 21444, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 7.8217854499816895, + "objective/train/original_loss": 7.821784496307373, + "objective/train/theoretical_loss": 6.135527723436086, + "objective/train/tokens_used": 40120800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23544126749038696, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487672090530396, + "objective/train/weighted_lm_loss": 8.202380180358887, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9515462517738342, + "theoretical_loss": 6.135527723436086, + "tokens_seen": 19660800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003518518518518519, + "loss": 7.9621, + "theoretical_loss": 6.121732500517281, + "tokens_seen": 19922944 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037037037037037035, + "loss": 7.7465, + "theoretical_loss": 6.068789398864208, + "tokens_seen": 20971520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003888888888888889, + "loss": 7.6596, + "theoretical_loss": 6.019130679617959, + "tokens_seen": 22020096 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.4673197567462921, + "objective/train/docs_used": 23342, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.7955641746521, + "objective/train/original_loss": 6.795564651489258, + "objective/train/theoretical_loss": 5.978100075365368, + "objective/train/tokens_used": 43397600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22965674102306366, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478991270065308, + "objective/train/weighted_lm_loss": 7.11885404586792, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9708001017570496, + "theoretical_loss": 5.978100075365368, + "tokens_seen": 22937600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004074074074074074, + "loss": 7.534, + "theoretical_loss": 5.972410144955672, + "tokens_seen": 23068672 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042592592592592595, + "loss": 7.4855, + "theoretical_loss": 5.928331780102846, + "tokens_seen": 24117248 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004444444444444444, + "loss": 7.5487, + "theoretical_loss": 5.886640662049593, + "tokens_seen": 25165824 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.47823473811149597, + "objective/train/docs_used": 25475, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 7.412741184234619, + "objective/train/original_loss": 7.412740707397461, + "objective/train/theoretical_loss": 5.847115817761683, + "objective/train/tokens_used": 46674400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23657572269439697, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490257740020752, + "objective/train/weighted_lm_loss": 7.774959564208984, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9553595781326294, + "theoretical_loss": 5.847115817761683, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.000462962962962963, + "loss": 7.4963, + "theoretical_loss": 5.847115817761683, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00048148148148148144, + "loss": 7.3715, + "theoretical_loss": 5.809564554032628, + "tokens_seen": 27262976 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005, + "loss": 7.5494, + "theoretical_loss": 5.773817911814618, + "tokens_seen": 28311552 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004998108210367007, + "loss": 7.3456, + "theoretical_loss": 5.739726989373027, + "tokens_seen": 29360128 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.4513802230358124, + "objective/train/docs_used": 27675, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.641174793243408, + "objective/train/original_loss": 6.641175270080566, + "objective/train/theoretical_loss": 5.735575307377884, + "objective/train/tokens_used": 49951200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2184389978647232, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0462480783462524, + "objective/train/weighted_lm_loss": 6.950745105743408, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9532368183135986, + "theoretical_loss": 5.735575307377884, + "tokens_seen": 29491200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004996216420734014, + "loss": 7.4153, + "theoretical_loss": 5.707159943639638, + "tokens_seen": 30408704 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004994324631101022, + "loss": 7.4387, + "theoretical_loss": 5.6759995259903135, + "tokens_seen": 31457280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004992432841468029, + "loss": 7.3125, + "theoretical_loss": 5.646141042853927, + "tokens_seen": 32505856 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.4938547611236572, + "objective/train/docs_used": 29692, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.995054244995117, + "objective/train/original_loss": 6.995054244995117, + "objective/train/theoretical_loss": 5.638868635567113, + "objective/train/tokens_used": 53228000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24502605199813843, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506311655044556, + "objective/train/weighted_lm_loss": 7.349433898925781, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9657517075538635, + "theoretical_loss": 5.638868635567113, + "tokens_seen": 32768000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004990541051835036, + "loss": 7.3289, + "theoretical_loss": 5.61749065678978, + "tokens_seen": 33554432 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004988649262202043, + "loss": 7.2839, + "theoretical_loss": 5.589963962496837, + "tokens_seen": 34603008 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004986757472569051, + "loss": 7.2524, + "theoretical_loss": 5.563484786407864, + "tokens_seen": 35651584 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.48527762293815613, + "objective/train/docs_used": 31846, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 7.310849666595459, + "objective/train/original_loss": 7.310849189758301, + "objective/train/theoretical_loss": 5.553810873340668, + "objective/train/tokens_used": 56504800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24185144901275635, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497572422027588, + "objective/train/weighted_lm_loss": 7.675657272338867, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9533452987670898, + "theoretical_loss": 5.553810873340668, + "tokens_seen": 36044800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004984865682936058, + "loss": 7.1968, + "theoretical_loss": 5.537984169317745, + "tokens_seen": 36700160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004982973893303065, + "loss": 7.129, + "theoretical_loss": 5.513399499781949, + "tokens_seen": 37748736 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004981082103670072, + "loss": 7.1462, + "theoretical_loss": 5.4896737724343065, + "tokens_seen": 38797312 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.4853717088699341, + "objective/train/docs_used": 33055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.262423038482666, + "objective/train/original_loss": 6.262422561645508, + "objective/train/theoretical_loss": 5.478116572052198, + "objective/train/tokens_used": 59781600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24034540355205536, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497589111328125, + "objective/train/weighted_lm_loss": 6.57367467880249, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9534735083580017, + "theoretical_loss": 5.478116572052198, + "tokens_seen": 39321600 + }, + { + "epoch": 0.01, + "learning_rate": 0.000497919031403708, + "loss": 7.0583, + "theoretical_loss": 5.466754950375066, + "tokens_seen": 39845888 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004977298524404087, + "loss": 6.9816, + "theoretical_loss": 5.444595414709691, + "tokens_seen": 40894464 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004975406734771094, + "loss": 7.0311, + "theoretical_loss": 5.423151487427312, + "tokens_seen": 41943040 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.48702144622802734, + "objective/train/docs_used": 34814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.590912818908691, + "objective/train/original_loss": 6.590912818908691, + "objective/train/theoretical_loss": 5.410094451075121, + "objective/train/tokens_used": 63058400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24035364389419556, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499237775802612, + "objective/train/weighted_lm_loss": 6.920047760009766, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9549703001976013, + "theoretical_loss": 5.410094451075121, + "tokens_seen": 42598400 + }, + { + "epoch": 0.02, + "learning_rate": 0.00049735149451381, + "loss": 7.0719, + "theoretical_loss": 5.402383016282556, + "tokens_seen": 42991616 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004971623155505107, + "loss": 6.9474, + "theoretical_loss": 5.382253012327088, + "tokens_seen": 44040192 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004969731365872115, + "loss": 6.9685, + "theoretical_loss": 5.362727332334602, + "tokens_seen": 45088768 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.49159982800483704, + "objective/train/docs_used": 36830, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.507728099822998, + "objective/train/original_loss": 6.50772762298584, + "objective/train/theoretical_loss": 5.348460575231594, + "objective/train/tokens_used": 66335200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2427477091550827, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503939390182495, + "objective/train/weighted_lm_loss": 6.835729122161865, + "objective/train/weights_max": 1.0512152910232544, + "objective/train/weights_min": 0.9736581444740295, + "theoretical_loss": 5.348460575231594, + "tokens_seen": 45875200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004967839576239122, + "loss": 6.8813, + "theoretical_loss": 5.343774399657292, + "tokens_seen": 46137344 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004965947786606129, + "loss": 6.7701, + "theoretical_loss": 5.325364958105951, + "tokens_seen": 47185920 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004964055996973137, + "loss": 6.8432, + "theoretical_loss": 5.307471854308661, + "tokens_seen": 48234496 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.4919637143611908, + "objective/train/docs_used": 38907, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.092850685119629, + "objective/train/original_loss": 6.092850208282471, + "objective/train/theoretical_loss": 5.292219058433327, + "objective/train/tokens_used": 69612000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2443958818912506, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504387617111206, + "objective/train/weighted_lm_loss": 6.401121616363525, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9526038765907288, + "theoretical_loss": 5.292219058433327, + "tokens_seen": 49152000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004962164207340144, + "loss": 6.7243, + "theoretical_loss": 5.290069844712654, + "tokens_seen": 49283072 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004960272417707151, + "loss": 6.8188, + "theoretical_loss": 5.273135423980159, + "tokens_seen": 50331648 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004958380628074158, + "loss": 6.7243, + "theoretical_loss": 5.256646672015468, + "tokens_seen": 51380224 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.4699273407459259, + "objective/train/docs_used": 40119, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.565382480621338, + "objective/train/original_loss": 6.56538200378418, + "objective/train/theoretical_loss": 5.240583117265738, + "objective/train/tokens_used": 72888800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2322714626789093, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481733083724976, + "objective/train/weighted_lm_loss": 6.87912654876709, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9521334767341614, + "theoretical_loss": 5.240583117265738, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004956488838441165, + "loss": 6.7436, + "theoretical_loss": 5.240583117265738, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004954597048808173, + "loss": 6.7854, + "theoretical_loss": 5.224925614276991, + "tokens_seen": 53477376 + }, + { + "epoch": 0.02, + "learning_rate": 0.000495270525917518, + "loss": 6.7126, + "theoretical_loss": 5.209656233771442, + "tokens_seen": 54525952 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004950813469542187, + "loss": 6.7657, + "theoretical_loss": 5.194758163752068, + "tokens_seen": 55574528 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.48516738414764404, + "objective/train/docs_used": 42175, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.86571741104126, + "objective/train/original_loss": 5.865716934204102, + "objective/train/theoretical_loss": 5.192921216021549, + "objective/train/tokens_used": 76165600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24054694175720215, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497394800186157, + "objective/train/weighted_lm_loss": 6.157922744750977, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9579119682312012, + "theoretical_loss": 5.192921216021549, + "tokens_seen": 55705600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004948921679909194, + "loss": 6.667, + "theoretical_loss": 5.180215620343211, + "tokens_seen": 56623104 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004947029890276201, + "loss": 6.726, + "theoretical_loss": 5.166013767248007, + "tokens_seen": 57671680 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004945138100643209, + "loss": 6.6426, + "theoretical_loss": 5.152138642849951, + "tokens_seen": 58720256 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.45703786611557007, + "objective/train/docs_used": 44036, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.240392208099365, + "objective/train/original_loss": 6.240392684936523, + "objective/train/theoretical_loss": 5.148719354852201, + "objective/train/tokens_used": 79442400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24004234373569489, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0469226837158203, + "objective/train/weighted_lm_loss": 6.5345845222473145, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9527196884155273, + "theoretical_loss": 5.148719354852201, + "tokens_seen": 58982400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004943246311010216, + "loss": 6.6952, + "theoretical_loss": 5.138577094110906, + "tokens_seen": 59768832 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004941354521377223, + "loss": 6.6242, + "theoretical_loss": 5.12531671652499, + "tokens_seen": 60817408 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004939462731744231, + "loss": 6.6385, + "theoretical_loss": 5.112345799479678, + "tokens_seen": 61865984 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.4917111396789551, + "objective/train/docs_used": 45869, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.257774353027344, + "objective/train/original_loss": 6.257774353027344, + "objective/train/theoretical_loss": 5.107554053900861, + "objective/train/tokens_used": 82719200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2443944662809372, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504134893417358, + "objective/train/weighted_lm_loss": 6.572638511657715, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.964363157749176, + "theoretical_loss": 5.107554053900861, + "tokens_seen": 62259200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004937570942111238, + "loss": 6.5263, + "theoretical_loss": 5.0996532764547, + "tokens_seen": 62914560 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004935679152478245, + "loss": 6.5435, + "theoretical_loss": 5.087228679557634, + "tokens_seen": 63963136 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004933787362845251, + "loss": 6.5413, + "theoretical_loss": 5.075062097954335, + "tokens_seen": 65011712 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.47832614183425903, + "objective/train/docs_used": 47778, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.022909641265869, + "objective/train/original_loss": 6.022909164428711, + "objective/train/theoretical_loss": 5.069072608639006, + "objective/train/tokens_used": 85996000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2356715351343155, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490304231643677, + "objective/train/weighted_lm_loss": 6.317652702331543, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.952083170413971, + "theoretical_loss": 5.069072608639006, + "tokens_seen": 65536000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004931895573212258, + "loss": 6.4814, + "theoretical_loss": 5.063144139803664, + "tokens_seen": 66060288 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004930003783579266, + "loss": 6.5092, + "theoretical_loss": 5.051465897350656, + "tokens_seen": 67108864 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004928111993946273, + "loss": 6.4835, + "theoretical_loss": 5.040018914871285, + "tokens_seen": 68157440 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.4900747239589691, + "objective/train/docs_used": 49496, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.184676170349121, + "objective/train/original_loss": 6.184676647186279, + "objective/train/theoretical_loss": 5.032978401333766, + "objective/train/tokens_used": 89272800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24198560416698456, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502376556396484, + "objective/train/weighted_lm_loss": 6.495528697967529, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9738534092903137, + "theoretical_loss": 5.032978401333766, + "tokens_seen": 68812800 + }, + { + "epoch": 0.02, + "learning_rate": 0.000492622020431328, + "loss": 6.5187, + "theoretical_loss": 5.028795159195919, + "tokens_seen": 69206016 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004924328414680287, + "loss": 6.431, + "theoretical_loss": 5.01778699256848, + "tokens_seen": 70254592 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004922436625047294, + "loss": 6.4495, + "theoretical_loss": 5.006987147624395, + "tokens_seen": 71303168 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.48679879307746887, + "objective/train/docs_used": 51476, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.056151390075684, + "objective/train/original_loss": 6.056151390075684, + "objective/train/theoretical_loss": 4.999019799720424, + "objective/train/tokens_used": 92549600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23905406892299652, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498950481414795, + "objective/train/weighted_lm_loss": 6.357154846191406, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9834248423576355, + "theoretical_loss": 4.999019799720424, + "tokens_seen": 72089600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004920544835414302, + "loss": 6.4097, + "theoretical_loss": 4.996388704293487, + "tokens_seen": 72351744 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004918653045781309, + "loss": 6.3944, + "theoretical_loss": 4.985985068454193, + "tokens_seen": 73400320 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004916761256148316, + "loss": 6.3967, + "theoretical_loss": 4.9757699521834, + "tokens_seen": 74448896 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.48649123311042786, + "objective/train/docs_used": 53412, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.1873979568481445, + "objective/train/original_loss": 6.187397480010986, + "objective/train/theoretical_loss": 4.966981646847723, + "objective/train/tokens_used": 95826400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23994770646095276, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498689413070679, + "objective/train/weighted_lm_loss": 6.4954962730407715, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9554786682128906, + "theoretical_loss": 4.966981646847723, + "tokens_seen": 75366400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004914869466515324, + "loss": 6.4082, + "theoretical_loss": 4.96573735546202, + "tokens_seen": 75497472 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004912977676882331, + "loss": 6.3538, + "theoretical_loss": 4.955881549210428, + "tokens_seen": 76546048 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004911085887249338, + "loss": 6.2587, + "theoretical_loss": 4.946197059540362, + "tokens_seen": 77594624 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.4663962125778198, + "objective/train/docs_used": 55300, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.6434221267700195, + "objective/train/original_loss": 5.6434221267700195, + "objective/train/theoretical_loss": 4.936678653120895, + "objective/train/tokens_used": 99103200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23964135348796844, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478569269180298, + "objective/train/weighted_lm_loss": 5.911611557006836, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.951373279094696, + "theoretical_loss": 4.936678653120895, + "tokens_seen": 78643200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004909194097616345, + "loss": 6.2099, + "theoretical_loss": 4.936678653120895, + "tokens_seen": 78643200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004907302307983352, + "loss": 6.3581, + "theoretical_loss": 4.927321323566017, + "tokens_seen": 79691776 + }, + { + "epoch": 0.03, + "learning_rate": 0.000490541051835036, + "loss": 6.2595, + "theoretical_loss": 4.918120278760069, + "tokens_seen": 80740352 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004903518728717367, + "loss": 6.2955, + "theoretical_loss": 4.909070929045194, + "tokens_seen": 81788928 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.46273210644721985, + "objective/train/docs_used": 57273, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.127193450927734, + "objective/train/original_loss": 6.127194404602051, + "objective/train/theoretical_loss": 4.907950205325841, + "objective/train/tokens_used": 102380000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23514951765537262, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0474683046340942, + "objective/train/weighted_lm_loss": 6.427692413330078, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9521082639694214, + "theoretical_loss": 4.907950205325841, + "tokens_seen": 81920000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004901626939084374, + "loss": 6.2293, + "theoretical_loss": 4.90016887620194, + "tokens_seen": 82837504 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004899735149451381, + "loss": 6.2417, + "theoretical_loss": 4.891409903160486, + "tokens_seen": 83886080 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004897843359818388, + "loss": 6.1855, + "theoretical_loss": 4.882789964385566, + "tokens_seen": 84934656 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.47956690192222595, + "objective/train/docs_used": 58950, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.768993377685547, + "objective/train/original_loss": 5.768993377685547, + "objective/train/theoretical_loss": 4.880656245308686, + "objective/train/tokens_used": 105656800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23697242140769958, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491610765457153, + "objective/train/weighted_lm_loss": 6.05217981338501, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9515382647514343, + "theoretical_loss": 4.880656245308686, + "tokens_seen": 85196800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004895951570185396, + "loss": 6.2568, + "theoretical_loss": 4.874305176883285, + "tokens_seen": 85983232 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004894059780552403, + "loss": 6.1378, + "theoretical_loss": 4.865951811782555, + "tokens_seen": 87031808 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004892167990919411, + "loss": 6.1333, + "theoretical_loss": 4.857726286448001, + "tokens_seen": 88080384 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.4823387563228607, + "objective/train/docs_used": 60915, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.946232795715332, + "objective/train/original_loss": 5.946232795715332, + "objective/train/theoretical_loss": 4.854673965977539, + "objective/train/tokens_used": 108933600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23662100732326508, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494364500045776, + "objective/train/weighted_lm_loss": 6.240575313568115, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9716301560401917, + "theoretical_loss": 4.854673965977539, + "tokens_seen": 88473600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004890276201286417, + "loss": 6.0885, + "theoretical_loss": 4.849625157084915, + "tokens_seen": 89128960 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004888384411653424, + "loss": 6.0157, + "theoretical_loss": 4.8416451118001484, + "tokens_seen": 90177536 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004886492622020431, + "loss": 6.0435, + "theoretical_loss": 4.833782964085925, + "tokens_seen": 91226112 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.4862677752971649, + "objective/train/docs_used": 62413, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.789903163909912, + "objective/train/original_loss": 5.789902687072754, + "objective/train/theoretical_loss": 4.829895138804573, + "objective/train/tokens_used": 112210400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24461686611175537, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498701333999634, + "objective/train/weighted_lm_loss": 6.078371524810791, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9515182375907898, + "theoretical_loss": 4.829895138804573, + "tokens_seen": 91750400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004884600832387438, + "loss": 6.0701, + "theoretical_loss": 4.826035646696238, + "tokens_seen": 92274688 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004882709042754446, + "loss": 6.0581, + "theoretical_loss": 4.8184002058880395, + "tokens_seen": 93323264 + }, + { + "epoch": 0.03, + "learning_rate": 0.00048808172531214527, + "loss": 6.0027, + "theoretical_loss": 4.810873796001641, + "tokens_seen": 94371840 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.4866400361061096, + "objective/train/docs_used": 64216, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.136915683746338, + "objective/train/original_loss": 6.136915683746338, + "objective/train/theoretical_loss": 4.80622393427368, + "objective/train/tokens_used": 115487200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24255988001823425, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498968362808228, + "objective/train/weighted_lm_loss": 6.4431023597717285, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.951479434967041, + "theoretical_loss": 4.80622393427368, + "tokens_seen": 95027200 + }, + { + "epoch": 0.03, + "learning_rate": 0.000487892546348846, + "loss": 6.0778, + "theoretical_loss": 4.803453674356847, + "tokens_seen": 95420416 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004877033673855467, + "loss": 6.023, + "theoretical_loss": 4.796137196443157, + "tokens_seen": 96468992 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004875141884222474, + "loss": 6.0011, + "theoretical_loss": 4.788921811384128, + "tokens_seen": 97517568 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.48324474692344666, + "objective/train/docs_used": 66562, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.525176525115967, + "objective/train/original_loss": 5.525176525115967, + "objective/train/theoretical_loss": 4.783575130772016, + "objective/train/tokens_used": 118764000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2386062890291214, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495374202728271, + "objective/train/weighted_lm_loss": 5.799572944641113, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9542084336280823, + "theoretical_loss": 4.783575130772016, + "tokens_seen": 98304000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004873250094589482, + "loss": 5.9655, + "theoretical_loss": 4.781805057657483, + "tokens_seen": 98566144 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004871358304956489, + "loss": 5.9567, + "theoretical_loss": 4.774784559054009, + "tokens_seen": 99614720 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048694665153234965, + "loss": 6.0355, + "theoretical_loss": 4.76785802085957, + "tokens_seen": 100663296 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.466911256313324, + "objective/train/docs_used": 67858, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.060521125793457, + "objective/train/original_loss": 5.060521125793457, + "objective/train/theoretical_loss": 4.761872632268167, + "objective/train/tokens_used": 122040800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2336144745349884, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478785037994385, + "objective/train/weighted_lm_loss": 5.305319786071777, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9513893127441406, + "theoretical_loss": 4.761872632268167, + "tokens_seen": 101580800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048675747256905034, + "loss": 5.966, + "theoretical_loss": 4.761023226245718, + "tokens_seen": 101711872 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048656829360575103, + "loss": 5.9966, + "theoretical_loss": 4.754278032855497, + "tokens_seen": 102760448 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004863791146424518, + "loss": 6.0478, + "theoretical_loss": 4.747620369572012, + "tokens_seen": 103809024 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.4873834550380707, + "objective/train/docs_used": 69339, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.778015613555908, + "objective/train/original_loss": 5.778016090393066, + "objective/train/theoretical_loss": 4.741048233458233, + "objective/train/tokens_used": 125317600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23935331404209137, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499550104141235, + "objective/train/weighted_lm_loss": 6.066779613494873, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9623346328735352, + "theoretical_loss": 4.741048233458233, + "tokens_seen": 104857600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004861899356791525, + "loss": 5.9699, + "theoretical_loss": 4.741048233458233, + "tokens_seen": 104857600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004860007567158532, + "loss": 5.9896, + "theoretical_loss": 4.734559686857368, + "tokens_seen": 105906176 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004858115777525539, + "loss": 6.0698, + "theoretical_loss": 4.728152854643862, + "tokens_seen": 106954752 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004856223987892546, + "loss": 6.0114, + "theoretical_loss": 4.721825921615813, + "tokens_seen": 108003328 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.48563089966773987, + "objective/train/docs_used": 71265, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.9209771156311035, + "objective/train/original_loss": 5.920976638793945, + "objective/train/theoretical_loss": 4.721040584744811, + "objective/train/tokens_used": 128594400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2397887110710144, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497820377349854, + "objective/train/weighted_lm_loss": 6.215074062347412, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9557585120201111, + "theoretical_loss": 4.721040584744811, + "tokens_seen": 108134400 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048543321982595536, + "loss": 5.974, + "theoretical_loss": 4.71557713002025, + "tokens_seen": 109051904 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048524404086265605, + "loss": 5.9541, + "theoretical_loss": 4.709404777203279, + "tokens_seen": 110100480 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004850548618993568, + "loss": 5.9167, + "theoretical_loss": 4.703307213377671, + "tokens_seen": 111149056 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.48972105979919434, + "objective/train/docs_used": 72747, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.520245552062988, + "objective/train/original_loss": 5.520245552062988, + "objective/train/theoretical_loss": 4.701794319727625, + "objective/train/tokens_used": 131871200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24246089160442352, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502045154571533, + "objective/train/weighted_lm_loss": 5.797506809234619, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9541246891021729, + "theoretical_loss": 4.701794319727625, + "tokens_seen": 111411200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048486568293605755, + "loss": 5.947, + "theoretical_loss": 4.697282839501012, + "tokens_seen": 112197632 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048467650397275824, + "loss": 5.9188, + "theoretical_loss": 4.691330105257913, + "tokens_seen": 113246208 + }, + { + "epoch": 0.04, + "learning_rate": 0.000484487325009459, + "loss": 5.8738, + "theoretical_loss": 4.685447507140298, + "tokens_seen": 114294784 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.4893937110900879, + "objective/train/docs_used": 74963, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.727111339569092, + "objective/train/original_loss": 5.727111339569092, + "objective/train/theoretical_loss": 4.683259315731689, + "objective/train/tokens_used": 135148000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24246010184288025, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050171971321106, + "objective/train/weighted_lm_loss": 6.014029026031494, + "objective/train/weights_max": 1.0512195825576782, + "objective/train/weights_min": 0.9718445539474487, + "theoretical_loss": 4.683259315731689, + "tokens_seen": 114688000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004842981460461597, + "loss": 5.8587, + "theoretical_loss": 4.679633586620149, + "tokens_seen": 115343360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004841089670828604, + "loss": 5.857, + "theoretical_loss": 4.673886928409454, + "tokens_seen": 116391936 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004839197881195611, + "loss": 5.8442, + "theoretical_loss": 4.668206158802439, + "tokens_seen": 117440512 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.48216262459754944, + "objective/train/docs_used": 77191, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.769596099853516, + "objective/train/original_loss": 5.769596099853516, + "objective/train/theoretical_loss": 4.665390063922041, + "objective/train/tokens_used": 138424800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23990829288959503, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494357347488403, + "objective/train/weighted_lm_loss": 6.053895950317383, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9527034759521484, + "theoretical_loss": 4.665390063922041, + "tokens_seen": 117964800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004837306091562618, + "loss": 5.8264, + "theoretical_loss": 4.662589944095533, + "tokens_seen": 118489088 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048354143019296256, + "loss": 5.8773, + "theoretical_loss": 4.657036989080726, + "tokens_seen": 119537664 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048335225122966326, + "loss": 5.8006, + "theoretical_loss": 4.651546035608336, + "tokens_seen": 120586240 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.4687286913394928, + "objective/train/docs_used": 78964, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.389880657196045, + "objective/train/original_loss": 5.389880657196045, + "objective/train/theoretical_loss": 4.648145130215498, + "objective/train/tokens_used": 141701600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23362566530704498, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0480601787567139, + "objective/train/weighted_lm_loss": 5.650588035583496, + "objective/train/weights_max": 1.0512198209762573, + "objective/train/weights_min": 0.9520321488380432, + "theoretical_loss": 4.648145130215498, + "tokens_seen": 121241600 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048316307226636395, + "loss": 5.7878, + "theoretical_loss": 4.646115861215389, + "tokens_seen": 121634816 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004829738933030647, + "loss": 5.7865, + "theoretical_loss": 4.640745277816107, + "tokens_seen": 122683392 + }, + { + "epoch": 0.04, + "learning_rate": 0.00048278471433976544, + "loss": 5.8501, + "theoretical_loss": 4.635433130451148, + "tokens_seen": 123731968 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.4775085151195526, + "objective/train/docs_used": 81017, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.654141426086426, + "objective/train/original_loss": 5.654140472412109, + "objective/train/theoretical_loss": 4.631486691835402, + "objective/train/tokens_used": 144978400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23175130784511566, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489286184310913, + "objective/train/weighted_lm_loss": 5.932117938995361, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9547845721244812, + "theoretical_loss": 4.631486691835402, + "tokens_seen": 124518400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004825955353764662, + "loss": 5.8575, + "theoretical_loss": 4.630178296092535, + "tokens_seen": 124780544 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004824063564131669, + "loss": 5.8373, + "theoretical_loss": 4.624979682501314, + "tokens_seen": 125829120 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004822171774498676, + "loss": 5.7121, + "theoretical_loss": 4.619836227135212, + "tokens_seen": 126877696 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4863889813423157, + "objective/train/docs_used": 82924, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.5550537109375, + "objective/train/original_loss": 5.5550537109375, + "objective/train/theoretical_loss": 4.615380137211477, + "objective/train/tokens_used": 148255200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23805738985538483, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498487949371338, + "objective/train/weighted_lm_loss": 5.832069396972656, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 1.0026726722717285, + "theoretical_loss": 4.615380137211477, + "tokens_seen": 127795200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004820279984865683, + "loss": 5.8039, + "theoretical_loss": 4.6147468961037195, + "tokens_seen": 127926272 + }, + { + "epoch": 0.05, + "learning_rate": 0.000481838819523269, + "loss": 5.8314, + "theoretical_loss": 4.609710683168146, + "tokens_seen": 128974848 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004816496405599697, + "loss": 5.7879, + "theoretical_loss": 4.604726608784391, + "tokens_seen": 130023424 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.48750680685043335, + "objective/train/docs_used": 84906, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.394794940948486, + "objective/train/original_loss": 5.39479398727417, + "objective/train/theoretical_loss": 4.599793719186264, + "objective/train/tokens_used": 151532000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24220028519630432, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049981713294983, + "objective/train/weighted_lm_loss": 5.664059162139893, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.952854573726654, + "theoretical_loss": 4.599793719186264, + "tokens_seen": 131072000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00048146046159667046, + "loss": 5.6343, + "theoretical_loss": 4.599793719186264, + "tokens_seen": 131072000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00048127128263337115, + "loss": 5.8198, + "theoretical_loss": 4.594911085507325, + "tokens_seen": 132120576 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004810821036700719, + "loss": 5.7462, + "theoretical_loss": 4.5900778029393425, + "tokens_seen": 133169152 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004808929247067726, + "loss": 5.7185, + "theoretical_loss": 4.585292989925557, + "tokens_seen": 134217728 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4873538017272949, + "objective/train/docs_used": 86934, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.74575138092041, + "objective/train/original_loss": 5.74575138092041, + "objective/train/theoretical_loss": 4.584698253288433, + "objective/train/tokens_used": 154808800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24090476334095, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499600172042847, + "objective/train/weighted_lm_loss": 6.0322418212890625, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9532478451728821, + "theoretical_loss": 4.584698253288433, + "tokens_seen": 134348800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004807037457434733, + "loss": 5.7085, + "theoretical_loss": 4.580555787387068, + "tokens_seen": 135266304 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004805145667801741, + "loss": 5.6594, + "theoretical_loss": 4.575865357980726, + "tokens_seen": 136314880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004803253878168748, + "loss": 5.6742, + "theoretical_loss": 4.571220885387013, + "tokens_seen": 137363456 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4891941249370575, + "objective/train/docs_used": 89023, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.547139644622803, + "objective/train/original_loss": 5.5471391677856445, + "objective/train/theoretical_loss": 4.570066854274275, + "objective/train/tokens_used": 158085600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.241221621632576, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501456260681152, + "objective/train/weighted_lm_loss": 5.825089931488037, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9656258821487427, + "theoretical_loss": 4.570066854274275, + "tokens_seen": 137625600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00048013620885357553, + "loss": 5.6366, + "theoretical_loss": 4.566621573626489, + "tokens_seen": 138412032 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004799470298902762, + "loss": 5.6921, + "theoretical_loss": 4.562066646403457, + "tokens_seen": 139460608 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004797578509269769, + "loss": 5.7071, + "theoretical_loss": 4.557555346475546, + "tokens_seen": 140509184 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4865155816078186, + "objective/train/docs_used": 91007, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.0053534507751465, + "objective/train/original_loss": 5.005353927612305, + "objective/train/theoretical_loss": 4.5558747052997965, + "objective/train/tokens_used": 161362400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24105681478977203, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498768091201782, + "objective/train/weighted_lm_loss": 5.255599498748779, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9540678262710571, + "theoretical_loss": 4.5558747052997965, + "tokens_seen": 140902400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047956867196367767, + "loss": 5.6781, + "theoretical_loss": 4.553086935048029, + "tokens_seen": 141557760 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047937949300037836, + "loss": 5.7415, + "theoretical_loss": 4.5486606911917145, + "tokens_seen": 142606336 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047919031403707905, + "loss": 5.6154, + "theoretical_loss": 4.544275911283326, + "tokens_seen": 143654912 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4804496467113495, + "objective/train/docs_used": 93093, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.170466899871826, + "objective/train/original_loss": 5.170466423034668, + "objective/train/theoretical_loss": 4.542098855026559, + "objective/train/tokens_used": 164639200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2387777715921402, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492587089538574, + "objective/train/weighted_lm_loss": 5.424218654632568, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9516282677650452, + "theoretical_loss": 4.542098855026559, + "tokens_seen": 144179200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004790011350737798, + "loss": 5.6278, + "theoretical_loss": 4.539931908467359, + "tokens_seen": 144703488 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004788119561104805, + "loss": 5.656, + "theoretical_loss": 4.53562801213843, + "tokens_seen": 145752064 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047862277714718124, + "loss": 5.6628, + "theoretical_loss": 4.531363567443194, + "tokens_seen": 146800640 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4876106083393097, + "objective/train/docs_used": 95084, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.640732765197754, + "objective/train/original_loss": 5.640732288360596, + "objective/train/theoretical_loss": 4.5287180387305765, + "objective/train/tokens_used": 167916000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2436685711145401, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049999713897705, + "objective/train/weighted_lm_loss": 5.9231648445129395, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9515953063964844, + "theoretical_loss": 4.5287180387305765, + "tokens_seen": 147456000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047843359818388194, + "loss": 5.642, + "theoretical_loss": 4.527137934800969, + "tokens_seen": 147849216 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004782444192205827, + "loss": 5.5955, + "theoretical_loss": 4.522950489442225, + "tokens_seen": 148897792 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047805524025728343, + "loss": 5.5843, + "theoretical_loss": 4.518800620964164, + "tokens_seen": 149946368 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4914400279521942, + "objective/train/docs_used": 96518, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.291038513183594, + "objective/train/original_loss": 5.291038513183594, + "objective/train/theoretical_loss": 4.515712520110756, + "objective/train/tokens_used": 171192800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24376289546489716, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.05038321018219, + "objective/train/weighted_lm_loss": 5.557369709014893, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9825595021247864, + "theoretical_loss": 4.515712520110756, + "tokens_seen": 150732800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004778660612939841, + "loss": 5.5918, + "theoretical_loss": 4.514687732902628, + "tokens_seen": 150994944 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047767688233068487, + "loss": 5.6056, + "theoretical_loss": 4.510611242319637, + "tokens_seen": 152043520 + }, + { + "epoch": 0.05, + "learning_rate": 0.00047748770336738557, + "loss": 5.6134, + "theoretical_loss": 4.506570579405888, + "tokens_seen": 153092096 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.4901806712150574, + "objective/train/docs_used": 98118, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.289200782775879, + "objective/train/original_loss": 5.289201736450195, + "objective/train/theoretical_loss": 4.503063951009098, + "objective/train/tokens_used": 174469600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24192149937152863, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502479076385498, + "objective/train/weighted_lm_loss": 5.555331707000732, + "objective/train/weights_max": 1.0512194633483887, + "objective/train/weights_min": 0.9558950066566467, + "theoretical_loss": 4.503063951009098, + "tokens_seen": 154009600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047729852440408626, + "loss": 5.6166, + "theoretical_loss": 4.502565187097554, + "tokens_seen": 154140672 + }, + { + "epoch": 0.06, + "learning_rate": 0.000477109345440787, + "loss": 5.5655, + "theoretical_loss": 4.498594520706801, + "tokens_seen": 155189248 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004769201664774877, + "loss": 5.5982, + "theoretical_loss": 4.494658047565416, + "tokens_seen": 156237824 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.4884991943836212, + "objective/train/docs_used": 100657, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.095714569091797, + "objective/train/original_loss": 5.095714092254639, + "objective/train/theoretical_loss": 4.490755246681026, + "objective/train/tokens_used": 177746400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2406940460205078, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500733852386475, + "objective/train/weighted_lm_loss": 5.351465225219727, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9665477275848389, + "theoretical_loss": 4.490755246681026, + "tokens_seen": 157286400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004767309875141884, + "loss": 5.6162, + "theoretical_loss": 4.490755246681026, + "tokens_seen": 157286400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047654180855088914, + "loss": 5.5682, + "theoretical_loss": 4.48688560840535, + "tokens_seen": 158334976 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047635262958758983, + "loss": 5.5178, + "theoretical_loss": 4.483048634114016, + "tokens_seen": 159383552 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004761634506242906, + "loss": 5.5363, + "theoretical_loss": 4.479243835897444, + "tokens_seen": 160432128 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.48170924186706543, + "objective/train/docs_used": 101862, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.535853862762451, + "objective/train/original_loss": 5.535854339599609, + "objective/train/theoretical_loss": 4.478770474607726, + "objective/train/tokens_used": 181023200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23878274857997894, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049384593963623, + "objective/train/weighted_lm_loss": 5.808737754821777, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.9529873728752136, + "theoretical_loss": 4.478770474607726, + "tokens_seen": 160563200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047597427166099133, + "loss": 5.5977, + "theoretical_loss": 4.475470736262361, + "tokens_seen": 161480704 + }, + { + "epoch": 0.06, + "learning_rate": 0.000475785092697692, + "loss": 5.5844, + "theoretical_loss": 4.471728867843497, + "tokens_seen": 162529280 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047559591373439277, + "loss": 5.5151, + "theoretical_loss": 4.4680177731250765, + "tokens_seen": 163577856 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.47214475274086, + "objective/train/docs_used": 104000, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.165049076080322, + "objective/train/original_loss": 5.165048599243164, + "objective/train/theoretical_loss": 4.467094755136979, + "objective/train/tokens_used": 184300000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22914351522922516, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0483790636062622, + "objective/train/weighted_lm_loss": 5.412636756896973, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9813255667686462, + "theoretical_loss": 4.467094755136979, + "tokens_seen": 163840000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047540673477109346, + "loss": 5.4748, + "theoretical_loss": 4.464337004171679, + "tokens_seen": 164626432 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004752175558077942, + "loss": 5.4863, + "theoretical_loss": 4.460686122368132, + "tokens_seen": 165675008 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004750283768444949, + "loss": 5.5302, + "theoretical_loss": 4.457064698168051, + "tokens_seen": 166723584 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.4795248210430145, + "objective/train/docs_used": 106106, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.443030834197998, + "objective/train/original_loss": 5.443031311035156, + "objective/train/theoretical_loss": 4.455714172485305, + "objective/train/tokens_used": 187576800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23740240931510925, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491594076156616, + "objective/train/weighted_lm_loss": 5.712173938751221, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9545544385910034, + "theoretical_loss": 4.455714172485305, + "tokens_seen": 167116800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004748391978811956, + "loss": 5.4637, + "theoretical_loss": 4.453472310850701, + "tokens_seen": 167772160 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047465001891789635, + "loss": 5.4558, + "theoretical_loss": 4.449908548285846, + "tokens_seen": 168820736 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047446083995459704, + "loss": 5.4025, + "theoretical_loss": 4.446373006706281, + "tokens_seen": 169869312 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.4749578535556793, + "objective/train/docs_used": 107805, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.134139060974121, + "objective/train/original_loss": 5.134139537811279, + "objective/train/theoretical_loss": 4.44461569484119, + "objective/train/tokens_used": 190853600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2378183901309967, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487042665481567, + "objective/train/weighted_lm_loss": 5.386655330657959, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9524388313293457, + "theoretical_loss": 4.44461569484119, + "tokens_seen": 170393600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047427166099129773, + "loss": 5.4333, + "theoretical_loss": 4.442865290487752, + "tokens_seen": 170917888 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004740824820279985, + "loss": 5.3058, + "theoretical_loss": 4.439385011935977, + "tokens_seen": 171966464 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004738933030646992, + "loss": 5.412, + "theoretical_loss": 4.435931791080489, + "tokens_seen": 173015040 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.4863138794898987, + "objective/train/docs_used": 109422, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.005147457122803, + "objective/train/original_loss": 5.005147933959961, + "objective/train/theoretical_loss": 4.433787102483406, + "objective/train/tokens_used": 194130400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24057146906852722, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049854040145874, + "objective/train/weighted_lm_loss": 5.255906105041504, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9543647170066833, + "theoretical_loss": 4.433787102483406, + "tokens_seen": 173670400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004737041241014, + "loss": 5.3846, + "theoretical_loss": 4.43250525547506, + "tokens_seen": 174063616 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047351494513810067, + "loss": 5.4827, + "theoretical_loss": 4.429105040004445, + "tokens_seen": 175112192 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047332576617480136, + "loss": 5.4385, + "theoretical_loss": 4.4257307866972155, + "tokens_seen": 176160768 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.474359929561615, + "objective/train/docs_used": 111276, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.441802024841309, + "objective/train/original_loss": 5.441802024841309, + "objective/train/theoretical_loss": 4.42321692297592, + "objective/train/tokens_used": 197407200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23385858535766602, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486245155334473, + "objective/train/weighted_lm_loss": 5.70626163482666, + "objective/train/weights_max": 1.051215410232544, + "objective/train/weights_min": 0.9550078511238098, + "theoretical_loss": 4.42321692297592, + "tokens_seen": 176947200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004731365872115021, + "loss": 5.5213, + "theoretical_loss": 4.422382144544446, + "tokens_seen": 177209344 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004729474082482028, + "loss": 5.426, + "theoretical_loss": 4.419058769324055, + "tokens_seen": 178257920 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047275822928490355, + "loss": 5.4245, + "theoretical_loss": 4.415760323430568, + "tokens_seen": 179306496 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.49394339323043823, + "objective/train/docs_used": 113383, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.963852882385254, + "objective/train/original_loss": 4.963852882385254, + "objective/train/theoretical_loss": 4.412894372625901, + "objective/train/tokens_used": 200684000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24598737061023712, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506449937820435, + "objective/train/weighted_lm_loss": 5.2148284912109375, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9575862288475037, + "theoretical_loss": 4.412894372625901, + "tokens_seen": 180224000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047256905032160425, + "loss": 5.4082, + "theoretical_loss": 4.412486475710132, + "tokens_seen": 180355072 + }, + { + "epoch": 0.06, + "learning_rate": 0.00047237987135830494, + "loss": 5.3975, + "theoretical_loss": 4.409236901300563, + "tokens_seen": 181403648 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004721906923950057, + "loss": 5.3623, + "theoretical_loss": 4.406011281476267, + "tokens_seen": 182452224 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.48171699047088623, + "objective/train/docs_used": 115260, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.099734306335449, + "objective/train/original_loss": 5.099733829498291, + "objective/train/theoretical_loss": 4.40280930349784, + "objective/train/tokens_used": 203960800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2392299622297287, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049387812614441, + "objective/train/weighted_lm_loss": 5.351738929748535, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9590397477149963, + "theoretical_loss": 4.40280930349784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004720015134317064, + "loss": 5.4224, + "theoretical_loss": 4.40280930349784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004718123344684071, + "loss": 5.4357, + "theoretical_loss": 4.3996306604662, + "tokens_seen": 184549376 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004716231555051078, + "loss": 5.431, + "theoretical_loss": 4.396475051181074, + "tokens_seen": 185597952 + }, + { + "epoch": 0.07, + "learning_rate": 0.00047143397654180857, + "loss": 5.3839, + "theoretical_loss": 4.393342180003689, + "tokens_seen": 186646528 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.4751421809196472, + "objective/train/docs_used": 117211, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.888144493103027, + "objective/train/original_loss": 4.888144493103027, + "objective/train/theoretical_loss": 4.392952155367621, + "objective/train/tokens_used": 207237600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24074752628803253, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487377643585205, + "objective/train/weighted_lm_loss": 5.126180648803711, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9516778588294983, + "theoretical_loss": 4.392952155367621, + "tokens_seen": 186777600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004712447975785093, + "loss": 5.3899, + "theoretical_loss": 4.390231756723523, + "tokens_seen": 187695104 + }, + { + "epoch": 0.07, + "learning_rate": 0.00047105561861521, + "loss": 5.3971, + "theoretical_loss": 4.387143496428978, + "tokens_seen": 188743680 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004708664396519107, + "loss": 5.4104, + "theoretical_loss": 4.384077119381821, + "tokens_seen": 189792256 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.4631696939468384, + "objective/train/docs_used": 119241, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.791223526000977, + "objective/train/original_loss": 4.791223526000977, + "objective/train/theoretical_loss": 4.383313912078293, + "objective/train/tokens_used": 210514400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23190495371818542, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0474956035614014, + "objective/train/weighted_lm_loss": 5.014393329620361, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9541125297546387, + "theoretical_loss": 4.383313912078293, + "tokens_seen": 190054400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00047067726068861145, + "loss": 5.4451, + "theoretical_loss": 4.381032350895292, + "tokens_seen": 190840832 + }, + { + "epoch": 0.07, + "learning_rate": 0.00047048808172531214, + "loss": 5.3872, + "theoretical_loss": 4.378008921215717, + "tokens_seen": 191889408 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004702989027620129, + "loss": 5.3645, + "theoretical_loss": 4.375006565407541, + "tokens_seen": 192937984 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.4861815869808197, + "objective/train/docs_used": 121008, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.046594619750977, + "objective/train/original_loss": 5.046595096588135, + "objective/train/theoretical_loss": 4.373886061826036, + "objective/train/tokens_used": 213791200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2395814061164856, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498359203338623, + "objective/train/weighted_lm_loss": 5.298506259918213, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9604629278182983, + "theoretical_loss": 4.373886061826036, + "tokens_seen": 193331200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004701097237987136, + "loss": 5.3806, + "theoretical_loss": 4.372025023241637, + "tokens_seen": 193986560 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004699205448354143, + "loss": 5.4043, + "theoretical_loss": 4.3690640390867985, + "tokens_seen": 195035136 + }, + { + "epoch": 0.07, + "learning_rate": 0.000469731365872115, + "loss": 5.2706, + "theoretical_loss": 4.366123361804301, + "tokens_seen": 196083712 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.49126356840133667, + "objective/train/docs_used": 122985, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.289262771606445, + "objective/train/original_loss": 5.289262294769287, + "objective/train/theoretical_loss": 4.364660560962464, + "objective/train/tokens_used": 217068000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2440873384475708, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503672361373901, + "objective/train/weighted_lm_loss": 5.555089473724365, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9654108881950378, + "theoretical_loss": 4.364660560962464, + "tokens_seen": 196608000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004695421869088157, + "loss": 5.2876, + "theoretical_loss": 4.363202744645427, + "tokens_seen": 197132288 + }, + { + "epoch": 0.07, + "learning_rate": 0.00046935300794551647, + "loss": 5.3211, + "theoretical_loss": 4.360301945151863, + "tokens_seen": 198180864 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004691638289822172, + "loss": 5.2849, + "theoretical_loss": 4.357420725058867, + "tokens_seen": 199229440 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.4830451011657715, + "objective/train/docs_used": 124865, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.945188522338867, + "objective/train/original_loss": 4.945188522338867, + "objective/train/theoretical_loss": 4.355629800949043, + "objective/train/tokens_used": 220344800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23967084288597107, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495227575302124, + "objective/train/weighted_lm_loss": 5.192099094390869, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.952029287815094, + "theoretical_loss": 4.355629800949043, + "tokens_seen": 199884800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004689746500189179, + "loss": 5.3328, + "theoretical_loss": 4.354558850201118, + "tokens_seen": 200278016 + }, + { + "epoch": 0.07, + "learning_rate": 0.00046878547105561866, + "loss": 5.2951, + "theoretical_loss": 4.351716090421165, + "tokens_seen": 201326592 + }, + { + "epoch": 0.07, + "learning_rate": 0.00046859629209231935, + "loss": 5.2508, + "theoretical_loss": 4.348892219480378, + "tokens_seen": 202375168 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.47592583298683167, + "objective/train/docs_used": 126478, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.113393783569336, + "objective/train/original_loss": 5.113393783569336, + "objective/train/theoretical_loss": 4.3467865781424315, + "objective/train/tokens_used": 223621600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24106524884700775, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04881751537323, + "objective/train/weighted_lm_loss": 5.362763404846191, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9515713453292847, + "theoretical_loss": 4.3467865781424315, + "tokens_seen": 203161600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00046840711312902004, + "loss": 5.2435, + "theoretical_loss": 4.346087014972328, + "tokens_seen": 203423744 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004682179341657208, + "loss": 5.3514, + "theoretical_loss": 4.343300258238523, + "tokens_seen": 204472320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004680287552024215, + "loss": 5.286, + "theoretical_loss": 4.34053173428641, + "tokens_seen": 205520896 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.4761151969432831, + "objective/train/docs_used": 128257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.176480770111084, + "objective/train/original_loss": 5.176480770111084, + "objective/train/theoretical_loss": 4.33812406612692, + "objective/train/tokens_used": 226898400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2329566329717636, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048795461654663, + "objective/train/weighted_lm_loss": 5.43222713470459, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9516546130180359, + "theoretical_loss": 4.33812406612692, + "tokens_seen": 206438400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00046783957623912223, + "loss": 5.3401, + "theoretical_loss": 4.337781231709587, + "tokens_seen": 206569472 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004676503972758229, + "loss": 5.3552, + "theoretical_loss": 4.3350485426101395, + "tokens_seen": 207618048 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004674612183125236, + "loss": 5.2876, + "theoretical_loss": 4.332333462523044, + "tokens_seen": 208666624 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.48480576276779175, + "objective/train/docs_used": 130307, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.197201728820801, + "objective/train/original_loss": 5.197201728820801, + "objective/train/theoretical_loss": 4.3296357903425715, + "objective/train/tokens_used": 230175200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.240909144282341, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049705147743225, + "objective/train/weighted_lm_loss": 5.455105781555176, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9520548582077026, + "theoretical_loss": 4.3296357903425715, + "tokens_seen": 209715200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00046727203934922437, + "loss": 5.273, + "theoretical_loss": 4.3296357903425715, + "tokens_seen": 209715200 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046708286038592506, + "loss": 5.3009, + "theoretical_loss": 4.326955328250631, + "tokens_seen": 210763776 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046689368142262586, + "loss": 5.1924, + "theoretical_loss": 4.324291881646978, + "tokens_seen": 211812352 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046670450245932656, + "loss": 5.2265, + "theoretical_loss": 4.321645259081256, + "tokens_seen": 212860928 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.48363375663757324, + "objective/train/docs_used": 132385, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.02983283996582, + "objective/train/original_loss": 5.02983283996582, + "objective/train/theoretical_loss": 4.321315604786012, + "objective/train/tokens_used": 233452000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23789982497692108, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495727062225342, + "objective/train/weighted_lm_loss": 5.278566360473633, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9722732901573181, + "theoretical_loss": 4.321315604786012, + "tokens_seen": 212992000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046651532349602725, + "loss": 5.2079, + "theoretical_loss": 4.3190152721867925, + "tokens_seen": 213909504 + }, + { + "epoch": 0.08, + "learning_rate": 0.000466326144532728, + "loss": 5.2469, + "theoretical_loss": 4.3164017356160995, + "tokens_seen": 214958080 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004661369655694287, + "loss": 5.1772, + "theoretical_loss": 4.313804466978039, + "tokens_seen": 216006656 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.48313969373703003, + "objective/train/docs_used": 133646, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.160606861114502, + "objective/train/original_loss": 5.160606384277344, + "objective/train/theoretical_loss": 4.313157670585552, + "objective/train/tokens_used": 236728800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2380112111568451, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495237112045288, + "objective/train/weighted_lm_loss": 5.417488098144531, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9516239166259766, + "theoretical_loss": 4.313157670585552, + "tokens_seen": 216268800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004659477866061294, + "loss": 5.2124, + "theoretical_loss": 4.311223286776586, + "tokens_seen": 217055232 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046575860764283013, + "loss": 5.292, + "theoretical_loss": 4.3086580183511565, + "tokens_seen": 218103808 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004655694286795308, + "loss": 5.2061, + "theoretical_loss": 4.306108487818438, + "tokens_seen": 219152384 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.48529815673828125, + "objective/train/docs_used": 136163, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.18554162979126, + "objective/train/original_loss": 5.18554162979126, + "objective/train/theoretical_loss": 4.305156436273988, + "objective/train/tokens_used": 240005600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24352312088012695, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497677326202393, + "objective/train/weighted_lm_loss": 5.44387149810791, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9514811038970947, + "theoretical_loss": 4.305156436273988, + "tokens_seen": 219545600 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046538024971623157, + "loss": 5.2285, + "theoretical_loss": 4.3035745240156915, + "tokens_seen": 220200960 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046519107075293227, + "loss": 5.2309, + "theoretical_loss": 4.301055958445467, + "tokens_seen": 221249536 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046500189178963296, + "loss": 5.2038, + "theoretical_loss": 4.2985526252217054, + "tokens_seen": 222298112 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.4832799434661865, + "objective/train/docs_used": 137875, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.737259864807129, + "objective/train/original_loss": 4.737259387969971, + "objective/train/theoretical_loss": 4.297306619601446, + "objective/train/tokens_used": 243282400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23784860968589783, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049536943435669, + "objective/train/weighted_lm_loss": 4.9707770347595215, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9518852829933167, + "theoretical_loss": 4.297306619601446, + "tokens_seen": 222822400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004648127128263337, + "loss": 5.2808, + "theoretical_loss": 4.296064361017181, + "tokens_seen": 223346688 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046462353386303445, + "loss": 5.2182, + "theoretical_loss": 4.293591005012228, + "tokens_seen": 224395264 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004644343548997352, + "loss": 5.2337, + "theoretical_loss": 4.291132398844749, + "tokens_seen": 225443840 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.4908119738101959, + "objective/train/docs_used": 139547, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.02741003036499, + "objective/train/original_loss": 5.027409553527832, + "objective/train/theoretical_loss": 4.289603190747359, + "objective/train/tokens_used": 246559200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24388805031776428, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503208637237549, + "objective/train/weighted_lm_loss": 5.2804741859436035, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.951794445514679, + "theoretical_loss": 4.289603190747359, + "tokens_seen": 226099200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004642451759364359, + "loss": 5.2597, + "theoretical_loss": 4.2886883865614305, + "tokens_seen": 226492416 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004640559969731366, + "loss": 5.2596, + "theoretical_loss": 4.286258814570154, + "tokens_seen": 227540992 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046386681800983734, + "loss": 5.287, + "theoretical_loss": 4.283843531593567, + "tokens_seen": 228589568 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.4877687394618988, + "objective/train/docs_used": 141330, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.088724136352539, + "objective/train/original_loss": 5.088723182678223, + "objective/train/theoretical_loss": 4.282041356805376, + "objective/train/tokens_used": 249836000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24049794673919678, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049999475479126, + "objective/train/weighted_lm_loss": 5.342431545257568, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9596543908119202, + "theoretical_loss": 4.282041356805376, + "tokens_seen": 229376000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046367763904653803, + "loss": 5.2102, + "theoretical_loss": 4.281442388623764, + "tokens_seen": 229638144 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004634884600832387, + "loss": 5.1973, + "theoretical_loss": 4.279055238878065, + "tokens_seen": 230686720 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046329928111993947, + "loss": 5.1969, + "theoretical_loss": 4.276681937755853, + "tokens_seen": 231735296 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.4821692407131195, + "objective/train/docs_used": 143449, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.025097370147705, + "objective/train/original_loss": 5.025097846984863, + "objective/train/theoretical_loss": 4.274616547428058, + "objective/train/tokens_used": 253112800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23833133280277252, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494284629821777, + "objective/train/weighted_lm_loss": 5.27445125579834, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9557837247848511, + "theoretical_loss": 4.274616547428058, + "tokens_seen": 232652800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00046311010215664016, + "loss": 5.242, + "theoretical_loss": 4.274322342796429, + "tokens_seen": 232783872 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004629209231933409, + "loss": 5.2641, + "theoretical_loss": 4.271976313637885, + "tokens_seen": 233832448 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004627317442300416, + "loss": 5.1358, + "theoretical_loss": 4.269643711976926, + "tokens_seen": 234881024 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.46489331126213074, + "objective/train/docs_used": 144727, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.503349781036377, + "objective/train/original_loss": 4.503350257873535, + "objective/train/theoretical_loss": 4.267324401529657, + "objective/train/tokens_used": 256389600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22985953092575073, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0476576089859009, + "objective/train/weighted_lm_loss": 4.7209906578063965, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9633262157440186, + "theoretical_loss": 4.267324401529657, + "tokens_seen": 235929600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004625425652667423, + "loss": 5.1814, + "theoretical_loss": 4.267324401529657, + "tokens_seen": 235929600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004623533863034431, + "loss": 5.1582, + "theoretical_loss": 4.265018247993272, + "tokens_seen": 236978176 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004621642073401438, + "loss": 5.133, + "theoretical_loss": 4.262725119008646, + "tokens_seen": 238026752 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046197502837684454, + "loss": 5.17, + "theoretical_loss": 4.260444884123785, + "tokens_seen": 239075328 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.48313987255096436, + "objective/train/docs_used": 146731, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.778027534484863, + "objective/train/original_loss": 4.778027534484863, + "objective/train/theoretical_loss": 4.260160754955504, + "objective/train/tokens_used": 259666400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24219833314418793, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495449304580688, + "objective/train/weighted_lm_loss": 5.0156965255737305, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9516287446022034, + "theoretical_loss": 4.260160754955504, + "tokens_seen": 239206400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046178584941354524, + "loss": 5.1438, + "theoretical_loss": 4.258177414758135, + "tokens_seen": 240123904 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046159667045024593, + "loss": 5.2066, + "theoretical_loss": 4.25592258416769, + "tokens_seen": 241172480 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004614074914869467, + "loss": 5.1412, + "theoretical_loss": 4.253680267410921, + "tokens_seen": 242221056 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.48830899596214294, + "objective/train/docs_used": 148718, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.019001483917236, + "objective/train/original_loss": 5.019001007080078, + "objective/train/theoretical_loss": 4.253121629035574, + "objective/train/tokens_used": 262943200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2425418645143509, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500637292861938, + "objective/train/weighted_lm_loss": 5.270970344543457, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9595874547958374, + "theoretical_loss": 4.253121629035574, + "tokens_seen": 242483200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046121831252364737, + "loss": 5.2042, + "theoretical_loss": 4.251450341315464, + "tokens_seen": 243269632 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046102913356034806, + "loss": 5.1482, + "theoretical_loss": 4.249232684445579, + "tokens_seen": 244318208 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004608399545970488, + "loss": 5.1712, + "theoretical_loss": 4.247027177070329, + "tokens_seen": 245366784 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.4830223023891449, + "objective/train/docs_used": 150524, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.851802825927734, + "objective/train/original_loss": 4.851802825927734, + "objective/train/theoretical_loss": 4.246203219947814, + "objective/train/tokens_used": 266220000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24083028733730316, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049526333808899, + "objective/train/weighted_lm_loss": 5.092092990875244, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9516807198524475, + "theoretical_loss": 4.246203219947814, + "tokens_seen": 245760000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004606507756337495, + "loss": 5.1937, + "theoretical_loss": 4.24483370113249, + "tokens_seen": 246415360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046046159667045025, + "loss": 5.1309, + "theoretical_loss": 4.242652140218147, + "tokens_seen": 247463936 + }, + { + "epoch": 0.09, + "learning_rate": 0.00046027241770715095, + "loss": 5.1674, + "theoretical_loss": 4.240482379526973, + "tokens_seen": 248512512 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.48063239455223083, + "objective/train/docs_used": 152460, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.242099761962891, + "objective/train/original_loss": 5.242099285125732, + "objective/train/theoretical_loss": 4.2394018888240215, + "objective/train/tokens_used": 269496800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23866912722587585, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492764711380005, + "objective/train/weighted_lm_loss": 5.502182483673096, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9536293148994446, + "theoretical_loss": 4.2394018888240215, + "tokens_seen": 249036800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004600832387438517, + "loss": 5.155, + "theoretical_loss": 4.2383243058431646, + "tokens_seen": 249561088 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045989405978055244, + "loss": 5.1044, + "theoretical_loss": 4.23617780750703, + "tokens_seen": 250609664 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045970488081725313, + "loss": 5.1227, + "theoretical_loss": 4.23404277438719, + "tokens_seen": 251658240 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.4906507432460785, + "objective/train/docs_used": 154232, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.827680587768555, + "objective/train/original_loss": 4.827680587768555, + "objective/train/theoretical_loss": 4.232714152537391, + "objective/train/tokens_used": 272773600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24195529520511627, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502949953079224, + "objective/train/weighted_lm_loss": 5.070379257202148, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9697064161300659, + "theoretical_loss": 4.232714152537391, + "tokens_seen": 252313600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004595157018539539, + "loss": 5.0412, + "theoretical_loss": 4.231919097853398, + "tokens_seen": 252706816 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004593265228906546, + "loss": 5.0234, + "theoretical_loss": 4.2298066707499515, + "tokens_seen": 253755392 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045913734392735527, + "loss": 5.1059, + "theoretical_loss": 4.227705387369683, + "tokens_seen": 254803968 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.48569464683532715, + "objective/train/docs_used": 156109, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.89478063583374, + "objective/train/original_loss": 4.894780158996582, + "objective/train/theoretical_loss": 4.226136675116626, + "objective/train/tokens_used": 276050400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24085675179958344, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497938394546509, + "objective/train/weighted_lm_loss": 5.138565540313721, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9536014795303345, + "theoretical_loss": 4.226136675116626, + "tokens_seen": 255590400 + }, + { + "epoch": 0.09, + "learning_rate": 0.000458948164964056, + "loss": 5.1322, + "theoretical_loss": 4.225615143428513, + "tokens_seen": 255852544 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004587589860007567, + "loss": 5.0152, + "theoretical_loss": 4.223535836040548, + "tokens_seen": 256901120 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045856980703745746, + "loss": 5.0449, + "theoretical_loss": 4.221467363693727, + "tokens_seen": 257949696 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.4772140085697174, + "objective/train/docs_used": 158136, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.774804592132568, + "objective/train/original_loss": 4.774805068969727, + "objective/train/theoretical_loss": 4.219666259736535, + "objective/train/tokens_used": 279327200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2351672351360321, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489166975021362, + "objective/train/weighted_lm_loss": 5.007704257965088, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9616609215736389, + "theoretical_loss": 4.219666259736535, + "tokens_seen": 258867200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045838062807415815, + "loss": 4.8683, + "theoretical_loss": 4.219409626225975, + "tokens_seen": 258998272 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045819144911085884, + "loss": 5.0815, + "theoretical_loss": 4.217362524801874, + "tokens_seen": 260046848 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004580022701475596, + "loss": 5.0467, + "theoretical_loss": 4.215325961889821, + "tokens_seen": 261095424 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.47997015714645386, + "objective/train/docs_used": 160243, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.758581161499023, + "objective/train/original_loss": 4.758580684661865, + "objective/train/theoretical_loss": 4.213299841239684, + "objective/train/tokens_used": 282604000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2388996034860611, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492112636566162, + "objective/train/weighted_lm_loss": 4.9929280281066895, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9515440464019775, + "theoretical_loss": 4.213299841239684, + "tokens_seen": 262144000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045781309118426034, + "loss": 4.9857, + "theoretical_loss": 4.213299841239684, + "tokens_seen": 262144000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00045762391222096103, + "loss": 4.9844, + "theoretical_loss": 4.211284067860909, + "tokens_seen": 263192576 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004574347332576618, + "loss": 4.9032, + "theoretical_loss": 4.209278548001103, + "tokens_seen": 264241152 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004572455542943625, + "loss": 4.9459, + "theoretical_loss": 4.207283189125054, + "tokens_seen": 265289728 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.4564594626426697, + "objective/train/docs_used": 161560, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.229607582092285, + "objective/train/original_loss": 4.229607582092285, + "objective/train/theoretical_loss": 4.20703447914773, + "objective/train/tokens_used": 285880800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24206334352493286, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.046875238418579, + "objective/train/weighted_lm_loss": 4.431016445159912, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9516088366508484, + "theoretical_loss": 4.20703447914773, + "tokens_seen": 265420800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004570563753310632, + "loss": 4.9257, + "theoretical_loss": 4.2052978998941954, + "tokens_seen": 266338304 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004568671963677639, + "loss": 4.9745, + "theoretical_loss": 4.203322590146491, + "tokens_seen": 267386880 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004566780174044646, + "loss": 4.8664, + "theoretical_loss": 4.2013571708767365, + "tokens_seen": 268435456 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.47331157326698303, + "objective/train/docs_used": 163521, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.584531307220459, + "objective/train/original_loss": 4.584530830383301, + "objective/train/theoretical_loss": 4.200867351124762, + "objective/train/tokens_used": 289157600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23217462003231049, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485113859176636, + "objective/train/weighted_lm_loss": 4.808891773223877, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9518424868583679, + "theoretical_loss": 4.200867351124762, + "tokens_seen": 268697600 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045648883844116536, + "loss": 4.873, + "theoretical_loss": 4.199401554217266, + "tokens_seen": 269484032 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045629965947786605, + "loss": 4.9196, + "theoretical_loss": 4.19745565341906, + "tokens_seen": 270532608 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004561104805145668, + "loss": 4.8289, + "theoretical_loss": 4.195519382833226, + "tokens_seen": 271581184 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.47363853454589844, + "objective/train/docs_used": 165284, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.5816330909729, + "objective/train/original_loss": 4.5816330909729, + "objective/train/theoretical_loss": 4.194795746858309, + "objective/train/tokens_used": 292434400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2301483005285263, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485334396362305, + "objective/train/weighted_lm_loss": 4.80542516708374, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.955842137336731, + "theoretical_loss": 4.194795746858309, + "tokens_seen": 271974400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004559213015512675, + "loss": 4.8413, + "theoretical_loss": 4.193592657892869, + "tokens_seen": 272629760 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004557321225879682, + "loss": 4.9962, + "theoretical_loss": 4.191675395095324, + "tokens_seen": 273678336 + }, + { + "epoch": 0.1, + "learning_rate": 0.000455542943624669, + "loss": 4.9632, + "theoretical_loss": 4.189767511984741, + "tokens_seen": 274726912 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.4624802768230438, + "objective/train/docs_used": 166979, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.361681938171387, + "objective/train/original_loss": 4.36168098449707, + "objective/train/theoretical_loss": 4.188817062326644, + "objective/train/tokens_used": 295711200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.244588240981102, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0474902391433716, + "objective/train/weighted_lm_loss": 4.574804306030273, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9531933069229126, + "theoretical_loss": 4.188817062326644, + "tokens_seen": 275251200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004553537646613697, + "loss": 5.0148, + "theoretical_loss": 4.187868927135035, + "tokens_seen": 275775488 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004551645856980704, + "loss": 4.896, + "theoretical_loss": 4.185979560133161, + "tokens_seen": 276824064 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004549754067347711, + "loss": 5.0797, + "theoretical_loss": 4.184099331562732, + "tokens_seen": 277872640 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.480182021856308, + "objective/train/docs_used": 168865, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.012778282165527, + "objective/train/original_loss": 5.012779235839844, + "objective/train/theoretical_loss": 4.182928794423724, + "objective/train/tokens_used": 298988000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23854205012321472, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049230694770813, + "objective/train/weighted_lm_loss": 5.2581329345703125, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9522960782051086, + "theoretical_loss": 4.182928794423724, + "tokens_seen": 278528000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004547862277714718, + "loss": 4.9065, + "theoretical_loss": 4.182228162987963, + "tokens_seen": 278921216 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045459704880817256, + "loss": 4.902, + "theoretical_loss": 4.18036597693793, + "tokens_seen": 279969792 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045440786984487326, + "loss": 4.9617, + "theoretical_loss": 4.178512696891136, + "tokens_seen": 281018368 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.4830773174762726, + "objective/train/docs_used": 170536, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.846111297607422, + "objective/train/original_loss": 4.846111297607422, + "objective/train/theoretical_loss": 4.177128535915539, + "objective/train/tokens_used": 302264800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24183543026447296, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049536943435669, + "objective/train/weighted_lm_loss": 5.087429523468018, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9517684578895569, + "theoretical_loss": 4.177128535915539, + "tokens_seen": 281804800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045421869088157395, + "loss": 4.9619, + "theoretical_loss": 4.176668247260391, + "tokens_seen": 282066944 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004540295119182747, + "loss": 4.9715, + "theoretical_loss": 4.174832553377978, + "tokens_seen": 283115520 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004538403329549754, + "loss": 5.0139, + "theoretical_loss": 4.173005541481111, + "tokens_seen": 284164096 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.4926310181617737, + "objective/train/docs_used": 172212, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.892210483551025, + "objective/train/original_loss": 4.892210006713867, + "objective/train/theoretical_loss": 4.171413970703851, + "objective/train/tokens_used": 305541600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24494759738445282, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505083799362183, + "objective/train/weighted_lm_loss": 5.139614105224609, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9525986909866333, + "theoretical_loss": 4.171413970703851, + "tokens_seen": 285081600 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045365115399167614, + "loss": 4.9594, + "theoretical_loss": 4.1711871386976815, + "tokens_seen": 285212672 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045346197502837683, + "loss": 4.9551, + "theoretical_loss": 4.16937727303227, + "tokens_seen": 286261248 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004532727960650776, + "loss": 4.9784, + "theoretical_loss": 4.167575873352437, + "tokens_seen": 287309824 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.49215155839920044, + "objective/train/docs_used": 173965, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.988898754119873, + "objective/train/original_loss": 4.988898277282715, + "objective/train/theoretical_loss": 4.165782869375278, + "objective/train/tokens_used": 308818400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.244205504655838, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504565238952637, + "objective/train/weighted_lm_loss": 5.240926265716553, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.956809401512146, + "theoretical_loss": 4.165782869375278, + "tokens_seen": 288358400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004530836171017783, + "loss": 4.9721, + "theoretical_loss": 4.165782869375278, + "tokens_seen": 288358400 + }, + { + "epoch": 0.1, + "learning_rate": 0.000452894438138479, + "loss": 4.9523, + "theoretical_loss": 4.163998191654223, + "tokens_seen": 289406976 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004527052591751797, + "loss": 4.9486, + "theoretical_loss": 4.162221771566105, + "tokens_seen": 290455552 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045251608021188046, + "loss": 4.9565, + "theoretical_loss": 4.160453541298465, + "tokens_seen": 291504128 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.48595374822616577, + "objective/train/docs_used": 176091, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.911673069000244, + "objective/train/original_loss": 4.911673069000244, + "objective/train/theoretical_loss": 4.160233085015529, + "objective/train/tokens_used": 312095200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2406431883573532, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498186349868774, + "objective/train/weighted_lm_loss": 5.156929969787598, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9550408124923706, + "theoretical_loss": 4.160233085015529, + "tokens_seen": 291635200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00045232690124858115, + "loss": 4.9944, + "theoretical_loss": 4.158693433837098, + "tokens_seen": 292552704 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004521377222852819, + "loss": 4.9663, + "theoretical_loss": 4.156941382953835, + "tokens_seen": 293601280 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004519485433219826, + "loss": 4.899, + "theoretical_loss": 4.155197323194555, + "tokens_seen": 294649856 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4910581409931183, + "objective/train/docs_used": 178137, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.6035237312316895, + "objective/train/original_loss": 4.603524208068848, + "objective/train/theoretical_loss": 4.154762549270199, + "objective/train/tokens_used": 315372000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24331238865852356, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503426790237427, + "objective/train/weighted_lm_loss": 4.835586071014404, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9553537964820862, + "theoretical_loss": 4.154762549270199, + "tokens_seen": 294912000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004517593643586833, + "loss": 4.9008, + "theoretical_loss": 4.153461189867425, + "tokens_seen": 295698432 + }, + { + "epoch": 0.11, + "learning_rate": 0.00045157018539538404, + "loss": 4.9441, + "theoretical_loss": 4.151732919031354, + "tokens_seen": 296747008 + }, + { + "epoch": 0.11, + "learning_rate": 0.00045138100643208473, + "loss": 4.9052, + "theoretical_loss": 4.150012447484665, + "tokens_seen": 297795584 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4841580390930176, + "objective/train/docs_used": 180121, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.756556034088135, + "objective/train/original_loss": 4.756556510925293, + "objective/train/theoretical_loss": 4.149369268635046, + "objective/train/tokens_used": 318648800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24232666194438934, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496474504470825, + "objective/train/weighted_lm_loss": 4.992458820343018, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9520720839500427, + "theoretical_loss": 4.149369268635046, + "tokens_seen": 298188800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004511918274687855, + "loss": 4.8953, + "theoretical_loss": 4.148299712753977, + "tokens_seen": 298844160 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004510026485054862, + "loss": 4.8506, + "theoretical_loss": 4.146594653083293, + "tokens_seen": 299892736 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004508134695421869, + "loss": 4.9196, + "theoretical_loss": 4.144897207423284, + "tokens_seen": 300941312 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4888658821582794, + "objective/train/docs_used": 182298, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.590531349182129, + "objective/train/original_loss": 4.590531349182129, + "objective/train/theoretical_loss": 4.144051320960009, + "objective/train/tokens_used": 321925600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2443903088569641, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501290559768677, + "objective/train/weighted_lm_loss": 4.820474624633789, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9515647888183594, + "theoretical_loss": 4.144051320960009, + "tokens_seen": 301465600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00045062429057888767, + "loss": 4.8532, + "theoretical_loss": 4.143207315420783, + "tokens_seen": 301989888 + }, + { + "epoch": 0.11, + "learning_rate": 0.00045043511161558836, + "loss": 4.7713, + "theoretical_loss": 4.141524917408454, + "tokens_seen": 303038464 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004502459326522891, + "loss": 4.8744, + "theoretical_loss": 4.1398499543946565, + "tokens_seen": 304087040 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.48735523223876953, + "objective/train/docs_used": 183436, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.56964635848999, + "objective/train/original_loss": 4.569646835327148, + "objective/train/theoretical_loss": 4.138806852152502, + "objective/train/tokens_used": 325202400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2419733703136444, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049965500831604, + "objective/train/weighted_lm_loss": 4.798412799835205, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9514796137809753, + "theoretical_loss": 4.138806852152502, + "tokens_seen": 304742400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004500567536889898, + "loss": 4.8162, + "theoretical_loss": 4.138182368053505, + "tokens_seen": 305135616 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004498675747256905, + "loss": 4.774, + "theoretical_loss": 4.136522100715087, + "tokens_seen": 306184192 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044967839576239124, + "loss": 4.7775, + "theoretical_loss": 4.134869095355876, + "tokens_seen": 307232768 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4952714741230011, + "objective/train/docs_used": 185431, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.745488166809082, + "objective/train/original_loss": 4.745488166809082, + "objective/train/theoretical_loss": 4.133634073066595, + "objective/train/tokens_used": 328479200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2461199015378952, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0507783889770508, + "objective/train/weighted_lm_loss": 4.98633337020874, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9608399868011475, + "theoretical_loss": 4.133634073066595, + "tokens_seen": 308019200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044948921679909194, + "loss": 4.852, + "theoretical_loss": 4.1332232955893105, + "tokens_seen": 308281344 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044930003783579263, + "loss": 4.8169, + "theoretical_loss": 4.131584645656535, + "tokens_seen": 309329920 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004491108588724934, + "loss": 4.7574, + "theoretical_loss": 4.129953090417319, + "tokens_seen": 310378496 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4933873116970062, + "objective/train/docs_used": 187155, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.723814010620117, + "objective/train/original_loss": 4.723814010620117, + "objective/train/theoretical_loss": 4.128531256565763, + "objective/train/tokens_used": 331756000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24465975165367126, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505824089050293, + "objective/train/weighted_lm_loss": 4.962899684906006, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9555627107620239, + "theoretical_loss": 4.128531256565763, + "tokens_seen": 311296000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044892167990919407, + "loss": 4.7497, + "theoretical_loss": 4.128328575341129, + "tokens_seen": 311427072 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044873250094589487, + "loss": 4.7832, + "theoretical_loss": 4.12671104649836, + "tokens_seen": 312475648 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044854332198259557, + "loss": 4.7443, + "theoretical_loss": 4.125100450551725, + "tokens_seen": 313524224 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4791731834411621, + "objective/train/docs_used": 189293, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.741052150726318, + "objective/train/original_loss": 4.741052150726318, + "objective/train/theoretical_loss": 4.123496734747793, + "objective/train/tokens_used": 335032800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24330000579357147, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491536855697632, + "objective/train/weighted_lm_loss": 4.974308967590332, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9513964653015137, + "theoretical_loss": 4.123496734747793, + "tokens_seen": 314572800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044835414301929626, + "loss": 4.7864, + "theoretical_loss": 4.123496734747793, + "tokens_seen": 314572800 + }, + { + "epoch": 0.11, + "learning_rate": 0.000448164964055997, + "loss": 4.766, + "theoretical_loss": 4.121899846908677, + "tokens_seen": 315621376 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004479757850926977, + "loss": 4.7524, + "theoretical_loss": 4.120309735423871, + "tokens_seen": 316669952 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044778660612939845, + "loss": 4.7824, + "theoretical_loss": 4.118726349242221, + "tokens_seen": 317718528 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.48897671699523926, + "objective/train/docs_used": 191279, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.168673992156982, + "objective/train/original_loss": 4.168674468994141, + "objective/train/theoretical_loss": 4.118528896321316, + "objective/train/tokens_used": 338309600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24297229945659637, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501326322555542, + "objective/train/weighted_lm_loss": 4.377741813659668, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9516782164573669, + "theoretical_loss": 4.118528896321316, + "tokens_seen": 317849600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044759742716609914, + "loss": 4.7589, + "theoretical_loss": 4.117149637864041, + "tokens_seen": 318767104 + }, + { + "epoch": 0.11, + "learning_rate": 0.00044740824820279983, + "loss": 4.7901, + "theoretical_loss": 4.115579551333372, + "tokens_seen": 319815680 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004472190692395006, + "loss": 4.8469, + "theoretical_loss": 4.114016040230357, + "tokens_seen": 320864256 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.4896620512008667, + "objective/train/docs_used": 193378, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.6810662746429443, + "objective/train/original_loss": 3.6810660362243652, + "objective/train/theoretical_loss": 4.113626184124224, + "objective/train/tokens_used": 341586400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2405911087989807, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501891374588013, + "objective/train/weighted_lm_loss": 3.866628646850586, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9788739681243896, + "theoretical_loss": 4.113626184124224, + "tokens_seen": 321126400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004470298902762013, + "loss": 4.7207, + "theoretical_loss": 4.112459055663768, + "tokens_seen": 321912832 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044684071131290197, + "loss": 4.8422, + "theoretical_loss": 4.110908549263647, + "tokens_seen": 322961408 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004466515323496027, + "loss": 4.7674, + "theoretical_loss": 4.109364473174075, + "tokens_seen": 324009984 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.4944951832294464, + "objective/train/docs_used": 195243, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.5325846672058105, + "objective/train/original_loss": 4.532584190368652, + "objective/train/theoretical_loss": 4.108787092774909, + "objective/train/tokens_used": 344863200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.246404230594635, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0507020950317383, + "objective/train/weighted_lm_loss": 4.762205600738525, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9522090554237366, + "theoretical_loss": 4.108787092774909, + "tokens_seen": 324403200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044646235338630346, + "loss": 4.8064, + "theoretical_loss": 4.107826780046074, + "tokens_seen": 325058560 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004462731744230042, + "loss": 4.8592, + "theoretical_loss": 4.106295423030614, + "tokens_seen": 326107136 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004460839954597049, + "loss": 4.8731, + "theoretical_loss": 4.104770355771754, + "tokens_seen": 327155712 + }, + { + "debugging/Self-BLEU-5": 0.5365128506817183, + "debugging/distinct-1-grams": 0.7612814402327299, + "debugging/distinct-2-grams": 0.9694583753853511, + "debugging/entropy-1-grams": 6.003629944255698, + "debugging/entropy-2-grams": 7.054987089269872, + "debugging/length": 495.25, + "debugging/num_segments": 16, + "debugging/raw_token_scores_avg": 0.04385810345411301, + "debugging/raw_token_scores_std": 0.15687797963619232, + "epoch": 0.12, + "objective/train/advantage_avg": 0.45616579055786133, + "objective/train/docs_used": 197327, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.436026573181152, + "objective/train/original_loss": 4.436026573181152, + "objective/train/theoretical_loss": 4.10401016644798, + "objective/train/tokens_used": 348140000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23268143832683563, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.046798825263977, + "objective/train/weighted_lm_loss": 4.649384498596191, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9514419436454773, + "theoretical_loss": 4.10401016644798, + "tokens_seen": 327680000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004458948164964056, + "loss": 4.7522, + "theoretical_loss": 4.103251532399884, + "tokens_seen": 328204288 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044570563753310635, + "loss": 4.8371, + "theoretical_loss": 4.101738907525098, + "tokens_seen": 329252864 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044551645856980704, + "loss": 4.8124, + "theoretical_loss": 4.100232436230659, + "tokens_seen": 330301440 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.4855183959007263, + "objective/train/docs_used": 199093, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.6984477043151855, + "objective/train/original_loss": 4.698448181152344, + "objective/train/theoretical_loss": 4.099293996766681, + "objective/train/tokens_used": 351416800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24312277138233185, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497876405715942, + "objective/train/weighted_lm_loss": 4.9305100440979, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9514390826225281, + "theoretical_loss": 4.099293996766681, + "tokens_seen": 330956800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004453272796065078, + "loss": 4.7747, + "theoretical_loss": 4.098732074066591, + "tokens_seen": 331350016 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004451381006432085, + "loss": 4.7442, + "theoretical_loss": 4.097237777043363, + "tokens_seen": 332398592 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004449489216799092, + "loss": 4.7144, + "theoretical_loss": 4.095749501625689, + "tokens_seen": 333447168 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.4659195840358734, + "objective/train/docs_used": 200392, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.83096170425415, + "objective/train/original_loss": 4.830961227416992, + "objective/train/theoretical_loss": 4.09463722080479, + "objective/train/tokens_used": 354693600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2294236719608307, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0477579832077026, + "objective/train/weighted_lm_loss": 5.058286190032959, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9520472288131714, + "theoretical_loss": 4.09463722080479, + "tokens_seen": 334233600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004447597427166099, + "loss": 4.719, + "theoretical_loss": 4.094267204726426, + "tokens_seen": 334495744 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004445705637533106, + "loss": 4.822, + "theoretical_loss": 4.092790843700574, + "tokens_seen": 335544320 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004443813847900113, + "loss": 4.7045, + "theoretical_loss": 4.091320376339368, + "tokens_seen": 336592896 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.48759955167770386, + "objective/train/docs_used": 202247, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.562262535095215, + "objective/train/original_loss": 4.562261581420898, + "objective/train/theoretical_loss": 4.0900385191913164, + "objective/train/tokens_used": 357970400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2414817214012146, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499873161315918, + "objective/train/weighted_lm_loss": 4.790578365325928, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.959564745426178, + "theoretical_loss": 4.0900385191913164, + "tokens_seen": 337510400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004441922058267121, + "loss": 4.7015, + "theoretical_loss": 4.089855760864484, + "tokens_seen": 337641472 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004440030268634128, + "loss": 4.6724, + "theoretical_loss": 4.0883969559223186, + "tokens_seen": 338690048 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044381384790011355, + "loss": 4.6783, + "theoretical_loss": 4.086943920578378, + "tokens_seen": 339738624 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.48911985754966736, + "objective/train/docs_used": 204260, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.2867255210876465, + "objective/train/original_loss": 4.286725997924805, + "objective/train/theoretical_loss": 4.085496614311752, + "objective/train/tokens_used": 361247200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2409982979297638, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501370429992676, + "objective/train/weighted_lm_loss": 4.502540111541748, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9587234258651733, + "theoretical_loss": 4.085496614311752, + "tokens_seen": 340787200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044362466893681425, + "loss": 4.7008, + "theoretical_loss": 4.085496614311752, + "tokens_seen": 340787200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044343548997351494, + "loss": 4.6856, + "theoretical_loss": 4.084054997009675, + "tokens_seen": 341835776 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004432463110102157, + "loss": 4.707, + "theoretical_loss": 4.082619028962182, + "tokens_seen": 342884352 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004430571320469164, + "loss": 4.73, + "theoretical_loss": 4.081188670856844, + "tokens_seen": 343932928 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.4761353135108948, + "objective/train/docs_used": 206338, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.324362754821777, + "objective/train/original_loss": 4.3243632316589355, + "objective/train/theoretical_loss": 4.081010268600093, + "objective/train/tokens_used": 364524000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23528918623924255, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048809289932251, + "objective/train/weighted_lm_loss": 4.536293983459473, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.951631486415863, + "theoretical_loss": 4.081010268600093, + "tokens_seen": 344064000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044286795308361713, + "loss": 4.7097, + "theoretical_loss": 4.079763883773593, + "tokens_seen": 344981504 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004426787741203178, + "loss": 4.7134, + "theoretical_loss": 4.078344629179623, + "tokens_seen": 346030080 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004424895951570185, + "loss": 4.7597, + "theoretical_loss": 4.076930868924384, + "tokens_seen": 347078656 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.466165155172348, + "objective/train/docs_used": 208654, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.435503005981445, + "objective/train/original_loss": 4.435503959655762, + "objective/train/theoretical_loss": 4.076578282916229, + "objective/train/tokens_used": 367800800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23283718526363373, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0477997064590454, + "objective/train/weighted_lm_loss": 4.647334098815918, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9514264464378357, + "theoretical_loss": 4.076578282916229, + "tokens_seen": 347340800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044230041619371926, + "loss": 4.791, + "theoretical_loss": 4.075522565234643, + "tokens_seen": 348127232 + }, + { + "epoch": 0.12, + "learning_rate": 0.00044211123723041996, + "loss": 4.7082, + "theoretical_loss": 4.074119680709633, + "tokens_seen": 349175808 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004419220582671207, + "loss": 4.6896, + "theoretical_loss": 4.072722178316271, + "tokens_seen": 350224384 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4762899577617645, + "objective/train/docs_used": 210524, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.206195831298828, + "objective/train/original_loss": 4.206194877624512, + "objective/train/theoretical_loss": 4.072199495003675, + "objective/train/tokens_used": 371077600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24041838943958282, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488507747650146, + "objective/train/weighted_lm_loss": 4.409471035003662, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9514844417572021, + "theoretical_loss": 4.072199495003675, + "tokens_seen": 350617600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00044173287930382145, + "loss": 4.7272, + "theoretical_loss": 4.071330021384458, + "tokens_seen": 351272960 + }, + { + "epoch": 0.13, + "learning_rate": 0.00044154370034052214, + "loss": 4.7532, + "theoretical_loss": 4.069943173602451, + "tokens_seen": 352321536 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004413545213772229, + "loss": 4.661, + "theoretical_loss": 4.068561599012304, + "tokens_seen": 353370112 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4837128520011902, + "objective/train/docs_used": 211739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.511666774749756, + "objective/train/original_loss": 4.511666297912598, + "objective/train/theoretical_loss": 4.0678727780229575, + "objective/train/tokens_used": 374354400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23984749615192413, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495903491973877, + "objective/train/weighted_lm_loss": 4.7349019050598145, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9533966183662415, + "theoretical_loss": 4.0678727780229575, + "tokens_seen": 353894400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004411653424139236, + "loss": 4.7159, + "theoretical_loss": 4.0671852620053865, + "tokens_seen": 354418688 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004409761634506243, + "loss": 4.6523, + "theoretical_loss": 4.0658141273179655, + "tokens_seen": 355467264 + }, + { + "epoch": 0.13, + "learning_rate": 0.000440786984487325, + "loss": 4.6922, + "theoretical_loss": 4.0644481600268625, + "tokens_seen": 356515840 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4922027587890625, + "objective/train/docs_used": 213839, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.124780654907227, + "objective/train/original_loss": 4.124780178070068, + "objective/train/theoretical_loss": 4.06359703915628, + "objective/train/tokens_used": 377631200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24517571926116943, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050466537475586, + "objective/train/weighted_lm_loss": 4.333748817443848, + "objective/train/weights_max": 1.0512195825576782, + "objective/train/weights_min": 0.958421528339386, + "theoretical_loss": 4.06359703915628, + "tokens_seen": 357171200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004405978055240257, + "loss": 4.6883, + "theoretical_loss": 4.06308732554517, + "tokens_seen": 357564416 + }, + { + "epoch": 0.13, + "learning_rate": 0.00044040862656072647, + "loss": 4.7466, + "theoretical_loss": 4.061731589618044, + "tokens_seen": 358612992 + }, + { + "epoch": 0.13, + "learning_rate": 0.00044021944759742716, + "loss": 4.7125, + "theoretical_loss": 4.060380918318552, + "tokens_seen": 359661568 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4845718443393707, + "objective/train/docs_used": 215849, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.4106268882751465, + "objective/train/original_loss": 4.410626411437988, + "objective/train/theoretical_loss": 4.05937121827939, + "objective/train/tokens_used": 380908000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2417263388633728, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496858358383179, + "objective/train/weighted_lm_loss": 4.631262302398682, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9544422030448914, + "theoretical_loss": 4.05937121827939, + "tokens_seen": 360448000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00044003026863412785, + "loss": 4.687, + "theoretical_loss": 4.059035278043591, + "tokens_seen": 360710144 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004398410896708286, + "loss": 4.7187, + "theoretical_loss": 4.057694635509866, + "tokens_seen": 361758720 + }, + { + "epoch": 0.13, + "learning_rate": 0.00043965191070752935, + "loss": 4.7245, + "theoretical_loss": 4.056358957749928, + "tokens_seen": 362807296 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4675551652908325, + "objective/train/docs_used": 217416, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.687634468078613, + "objective/train/original_loss": 4.687634468078613, + "objective/train/theoretical_loss": 4.055194286696828, + "objective/train/tokens_used": 384184800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23009935021400452, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0479251146316528, + "objective/train/weighted_lm_loss": 4.910377502441406, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9614465832710266, + "theoretical_loss": 4.055194286696828, + "tokens_seen": 363724800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004394627317442301, + "loss": 4.6763, + "theoretical_loss": 4.055028212108276, + "tokens_seen": 363855872 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004392735527809308, + "loss": 4.6411, + "theoretical_loss": 4.053702366237517, + "tokens_seen": 364904448 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004390843738176315, + "loss": 4.6192, + "theoretical_loss": 4.05238138809458, + "tokens_seen": 365953024 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.48896151781082153, + "objective/train/docs_used": 219277, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.551604270935059, + "objective/train/original_loss": 4.551604270935059, + "objective/train/theoretical_loss": 4.051065245936996, + "objective/train/tokens_used": 387461600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24276012182235718, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501302480697632, + "objective/train/weighted_lm_loss": 4.779492378234863, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9522210359573364, + "theoretical_loss": 4.051065245936996, + "tokens_seen": 367001600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00043889519485433223, + "loss": 4.5711, + "theoretical_loss": 4.051065245936996, + "tokens_seen": 367001600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004387060158910329, + "loss": 4.6239, + "theoretical_loss": 4.049753908319223, + "tokens_seen": 368050176 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004385168369277336, + "loss": 4.6925, + "theoretical_loss": 4.048447344089038, + "tokens_seen": 369098752 + }, + { + "epoch": 0.13, + "learning_rate": 0.00043832765796443437, + "loss": 4.6265, + "theoretical_loss": 4.04714552238397, + "tokens_seen": 370147328 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.48688071966171265, + "objective/train/docs_used": 221307, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.563080310821533, + "objective/train/original_loss": 4.563079833984375, + "objective/train/theoretical_loss": 4.0469831266037, + "objective/train/tokens_used": 390738400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24148398637771606, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499156713485718, + "objective/train/weighted_lm_loss": 4.790269374847412, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9574251174926758, + "theoretical_loss": 4.0469831266037, + "tokens_seen": 370278400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00043813847900113506, + "loss": 4.6133, + "theoretical_loss": 4.045848412627802, + "tokens_seen": 371195904 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004379493000378358, + "loss": 4.6978, + "theoretical_loss": 4.044555984527107, + "tokens_seen": 372244480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004377601210745365, + "loss": 4.6412, + "theoretical_loss": 4.04326820806785, + "tokens_seen": 373293056 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4824051558971405, + "objective/train/docs_used": 223312, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.324045658111572, + "objective/train/original_loss": 4.324045658111572, + "objective/train/theoretical_loss": 4.042946987281072, + "objective/train/tokens_used": 394015200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2413923442363739, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494674444198608, + "objective/train/weighted_lm_loss": 4.537526607513428, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.951562762260437, + "theoretical_loss": 4.042946987281072, + "tokens_seen": 373555200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004375709421112372, + "loss": 4.6788, + "theoretical_loss": 4.041985053512038, + "tokens_seen": 374341632 + }, + { + "epoch": 0.13, + "learning_rate": 0.000437381763147938, + "loss": 4.5914, + "theoretical_loss": 4.040706491394406, + "tokens_seen": 375390208 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004371925841846387, + "loss": 4.5749, + "theoretical_loss": 4.0394324925191745, + "tokens_seen": 376438784 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.4723070561885834, + "objective/train/docs_used": 225129, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.304555892944336, + "objective/train/original_loss": 4.304555892944336, + "objective/train/theoretical_loss": 4.038955913488913, + "objective/train/tokens_used": 397292000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23399347066879272, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484198331832886, + "objective/train/weighted_lm_loss": 4.51221227645874, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9514410495758057, + "theoretical_loss": 4.038955913488913, + "tokens_seen": 376832000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00043700340522133944, + "loss": 4.5628, + "theoretical_loss": 4.038163027956834, + "tokens_seen": 377487360 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043681422625804013, + "loss": 4.5568, + "theoretical_loss": 4.036898069040989, + "tokens_seen": 378535936 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004366250472947408, + "loss": 4.5313, + "theoretical_loss": 4.03563758736524, + "tokens_seen": 379584512 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.4890446662902832, + "objective/train/docs_used": 226852, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.509145259857178, + "objective/train/original_loss": 4.5091447830200195, + "objective/train/theoretical_loss": 4.035009016685741, + "objective/train/tokens_used": 400568800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24364469945430756, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501431226730347, + "objective/train/weighted_lm_loss": 4.735697269439697, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9592508673667908, + "theoretical_loss": 4.035009016685741, + "tokens_seen": 380108800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043643586833144157, + "loss": 4.6097, + "theoretical_loss": 4.034381554780124, + "tokens_seen": 380633088 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043624668936814227, + "loss": 4.5698, + "theoretical_loss": 4.033129943390076, + "tokens_seen": 381681664 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043605751040484296, + "loss": 4.6048, + "theoretical_loss": 4.031882725550463, + "tokens_seen": 382730240 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.47916504740715027, + "objective/train/docs_used": 228664, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.19334602355957, + "objective/train/original_loss": 4.19334602355957, + "objective/train/theoretical_loss": 4.031105433316977, + "objective/train/tokens_used": 403845600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23511439561843872, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491114854812622, + "objective/train/weighted_lm_loss": 4.397870063781738, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9654620289802551, + "theoretical_loss": 4.031105433316977, + "tokens_seen": 383385600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004358683314415437, + "loss": 4.6059, + "theoretical_loss": 4.030639873864638, + "tokens_seen": 383778816 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004356791524782444, + "loss": 4.4924, + "theoretical_loss": 4.029401361181049, + "tokens_seen": 384827392 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043548997351494515, + "loss": 4.5844, + "theoretical_loss": 4.028167160590383, + "tokens_seen": 385875968 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.48542872071266174, + "objective/train/docs_used": 230356, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9225900173187256, + "objective/train/original_loss": 3.922590732574463, + "objective/train/theoretical_loss": 4.027244323905839, + "objective/train/tokens_used": 407122400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24344097077846527, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497803688049316, + "objective/train/weighted_lm_loss": 4.117582321166992, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9526301622390747, + "theoretical_loss": 4.027244323905839, + "tokens_seen": 386662400 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043530079455164584, + "loss": 4.5285, + "theoretical_loss": 4.026937245422756, + "tokens_seen": 386924544 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004351116155883466, + "loss": 4.4802, + "theoretical_loss": 4.025711589244939, + "tokens_seen": 387973120 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043492243662504734, + "loss": 4.5097, + "theoretical_loss": 4.024490165857627, + "tokens_seen": 389021696 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.4711974859237671, + "objective/train/docs_used": 231398, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.248833656311035, + "objective/train/original_loss": 4.248834133148193, + "objective/train/theoretical_loss": 4.0234248721847035, + "objective/train/tokens_used": 410399200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23104798793792725, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048293948173523, + "objective/train/weighted_lm_loss": 4.452199935913086, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9518858790397644, + "theoretical_loss": 4.0234248721847035, + "tokens_seen": 389939200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043473325766174803, + "loss": 4.5464, + "theoretical_loss": 4.023272949292743, + "tokens_seen": 390070272 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004345440786984488, + "loss": 4.5371, + "theoretical_loss": 4.022059913810782, + "tokens_seen": 391118848 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043435489973514947, + "loss": 4.5544, + "theoretical_loss": 4.020851033898196, + "tokens_seen": 392167424 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.47954094409942627, + "objective/train/docs_used": 233309, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.41207218170166, + "objective/train/original_loss": 4.412071228027344, + "objective/train/theoretical_loss": 4.019646284264807, + "objective/train/tokens_used": 413676000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23941875994205475, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491708517074585, + "objective/train/weighted_lm_loss": 4.62913703918457, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9515848755836487, + "theoretical_loss": 4.019646284264807, + "tokens_seen": 393216000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043416572077185016, + "loss": 4.6125, + "theoretical_loss": 4.019646284264807, + "tokens_seen": 393216000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004339765418085509, + "loss": 4.6338, + "theoretical_loss": 4.01844563984127, + "tokens_seen": 394264576 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004337873628452516, + "loss": 4.6127, + "theoretical_loss": 4.0172490757765535, + "tokens_seen": 395313152 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004335981838819523, + "loss": 4.632, + "theoretical_loss": 4.016056567435475, + "tokens_seen": 396361728 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.48889124393463135, + "objective/train/docs_used": 234734, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.166934967041016, + "objective/train/original_loss": 4.166934967041016, + "objective/train/theoretical_loss": 4.0159077878422815, + "objective/train/tokens_used": 416952800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2409810870885849, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501141548156738, + "objective/train/weighted_lm_loss": 4.375463962554932, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9956961274147034, + "theoretical_loss": 4.0159077878422815, + "tokens_seen": 396492800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043340900491865305, + "loss": 4.5567, + "theoretical_loss": 4.014868090396256, + "tokens_seen": 397410304 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043321982595535374, + "loss": 4.4485, + "theoretical_loss": 4.013683620448113, + "tokens_seen": 398458880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004330306469920545, + "loss": 4.5947, + "theoretical_loss": 4.0125031335888925, + "tokens_seen": 399507456 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.476698637008667, + "objective/train/docs_used": 236668, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.221770286560059, + "objective/train/original_loss": 4.221770763397217, + "objective/train/theoretical_loss": 4.0122086314386545, + "objective/train/tokens_used": 420229600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23127447068691254, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488451719284058, + "objective/train/weighted_lm_loss": 4.427043914794922, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9582767486572266, + "theoretical_loss": 4.0122086314386545, + "tokens_seen": 399769600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043284146802875524, + "loss": 4.554, + "theoretical_loss": 4.0113266060227275, + "tokens_seen": 400556032 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043265228906545593, + "loss": 4.5778, + "theoretical_loss": 4.010154014157727, + "tokens_seen": 401604608 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004324631101021567, + "loss": 4.5306, + "theoretical_loss": 4.008985334603709, + "tokens_seen": 402653184 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.4728894829750061, + "objective/train/docs_used": 238677, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.1277618408203125, + "objective/train/original_loss": 4.1277618408203125, + "objective/train/theoretical_loss": 4.00854808367405, + "objective/train/tokens_used": 423506400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23286591470241547, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048472285270691, + "objective/train/weighted_lm_loss": 4.3288140296936035, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9518867135047913, + "theoretical_loss": 4.00854808367405, + "tokens_seen": 403046400 + }, + { + "epoch": 0.14, + "learning_rate": 0.00043227393113885737, + "loss": 4.541, + "theoretical_loss": 4.007820544169944, + "tokens_seen": 403701760 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004320847521755581, + "loss": 4.5066, + "theoretical_loss": 4.006659619862954, + "tokens_seen": 404750336 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004318955732122588, + "loss": 4.5251, + "theoretical_loss": 4.0055025388843175, + "tokens_seen": 405798912 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.4813375473022461, + "objective/train/docs_used": 240483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.646761417388916, + "objective/train/original_loss": 4.646761894226074, + "objective/train/theoretical_loss": 4.004925432571433, + "objective/train/tokens_used": 426783200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23505516350269318, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493284463882446, + "objective/train/weighted_lm_loss": 4.875973224639893, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9840477705001831, + "theoretical_loss": 4.004925432571433, + "tokens_seen": 406323200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004317063942489595, + "loss": 4.5157, + "theoretical_loss": 4.004349278628525, + "tokens_seen": 406847488 + }, + { + "epoch": 0.15, + "learning_rate": 0.00043151721528566025, + "loss": 4.6079, + "theoretical_loss": 4.00319981668085, + "tokens_seen": 407896064 + }, + { + "epoch": 0.15, + "learning_rate": 0.00043132803632236095, + "loss": 4.617, + "theoretical_loss": 4.002054130815253, + "tokens_seen": 408944640 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.4804536998271942, + "objective/train/docs_used": 242341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.573429107666016, + "objective/train/original_loss": 4.573429107666016, + "objective/train/theoretical_loss": 4.0013399848903175, + "objective/train/tokens_used": 430060000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23860077559947968, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492582321166992, + "objective/train/weighted_lm_loss": 4.798383712768555, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9523076415061951, + "theoretical_loss": 4.0013399848903175, + "tokens_seen": 409600000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00043113885735906164, + "loss": 4.6468, + "theoretical_loss": 4.000912198992316, + "tokens_seen": 409993216 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004309496783957624, + "loss": 4.5594, + "theoretical_loss": 3.9997739993572035, + "tokens_seen": 411041792 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004307604994324631, + "loss": 4.6211, + "theoretical_loss": 3.9986395102376453, + "tokens_seen": 412090368 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.480744332075119, + "objective/train/docs_used": 244062, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.4469990730285645, + "objective/train/original_loss": 4.446999549865723, + "objective/train/theoretical_loss": 3.997791065488486, + "objective/train/tokens_used": 433336800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2378513216972351, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492832660675049, + "objective/train/weighted_lm_loss": 4.665361404418945, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9535585641860962, + "theoretical_loss": 3.997791065488486, + "tokens_seen": 412876800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004305713204691639, + "loss": 4.6416, + "theoretical_loss": 3.99750871014196, + "tokens_seen": 413138944 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004303821415058646, + "loss": 4.5362, + "theoretical_loss": 3.9963815777570897, + "tokens_seen": 414187520 + }, + { + "epoch": 0.15, + "learning_rate": 0.00043019296254256527, + "loss": 4.5415, + "theoretical_loss": 3.99525809194667, + "tokens_seen": 415236096 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.48422208428382874, + "objective/train/docs_used": 245710, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.429144382476807, + "objective/train/original_loss": 4.429144859313965, + "objective/train/theoretical_loss": 3.9942780167103145, + "objective/train/tokens_used": 436613600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23887021839618683, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496364831924438, + "objective/train/weighted_lm_loss": 4.65024995803833, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9516666531562805, + "theoretical_loss": 3.9942780167103145, + "tokens_seen": 416153600 + }, + { + "epoch": 0.15, + "learning_rate": 0.000430003783579266, + "loss": 4.4764, + "theoretical_loss": 3.9941382317491225, + "tokens_seen": 416284672 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004298146046159667, + "loss": 4.5263, + "theoretical_loss": 3.9930219763757755, + "tokens_seen": 417333248 + }, + { + "epoch": 0.15, + "learning_rate": 0.00042962542565266746, + "loss": 4.5673, + "theoretical_loss": 3.9919093052090058, + "tokens_seen": 418381824 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.4735833406448364, + "objective/train/docs_used": 247828, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.22092866897583, + "objective/train/original_loss": 4.220929145812988, + "objective/train/theoretical_loss": 3.9908001978004064, + "objective/train/tokens_used": 439890400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23547500371932983, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485553741455078, + "objective/train/weighted_lm_loss": 4.430475234985352, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9541728496551514, + "theoretical_loss": 3.9908001978004064, + "tokens_seen": 419430400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00042943624668936815, + "loss": 4.5733, + "theoretical_loss": 3.9908001978004064, + "tokens_seen": 419430400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00042924706772606884, + "loss": 4.5431, + "theoretical_loss": 3.989694633868981, + "tokens_seen": 420478976 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004290578887627696, + "loss": 4.5784, + "theoretical_loss": 3.988592593299358, + "tokens_seen": 421527552 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004288687097994703, + "loss": 4.4724, + "theoretical_loss": 3.9874940561400294, + "tokens_seen": 422576128 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.47702404856681824, + "objective/train/docs_used": 249572, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.135619163513184, + "objective/train/original_loss": 4.135619163513184, + "objective/train/theoretical_loss": 3.9873569843412913, + "objective/train/tokens_used": 443167200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2411937266588211, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048928141593933, + "objective/train/weighted_lm_loss": 4.336599826812744, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9514898061752319, + "theoretical_loss": 3.9873569843412913, + "tokens_seen": 422707200 + }, + { + "epoch": 0.15, + "learning_rate": 0.000428679530836171, + "loss": 4.6252, + "theoretical_loss": 3.986399002601617, + "tokens_seen": 423624704 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004284903518728717, + "loss": 4.5332, + "theoretical_loss": 3.9853074130551542, + "tokens_seen": 424673280 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004283011729095725, + "loss": 4.4783, + "theoretical_loss": 3.984219268030392, + "tokens_seen": 425721856 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.4593813717365265, + "objective/train/docs_used": 251147, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.276764392852783, + "objective/train/original_loss": 4.276763916015625, + "objective/train/theoretical_loss": 3.9839477677140245, + "objective/train/tokens_used": 446444000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23159852623939514, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0471152067184448, + "objective/train/weighted_lm_loss": 4.481888771057129, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9540482759475708, + "theoretical_loss": 3.9839477677140245, + "tokens_seen": 425984000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004281119939462732, + "loss": 4.5639, + "theoretical_loss": 3.983134548214133, + "tokens_seen": 426770432 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004279228149829739, + "loss": 4.5079, + "theoretical_loss": 3.98205323444858, + "tokens_seen": 427819008 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004277336360196746, + "loss": 4.4224, + "theoretical_loss": 3.9809753077297074, + "tokens_seen": 428867584 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.47926458716392517, + "objective/train/docs_used": 253081, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.053443431854248, + "objective/train/original_loss": 4.053443908691406, + "objective/train/theoretical_loss": 3.98057195458059, + "objective/train/tokens_used": 449720800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23603300750255585, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049126148223877, + "objective/train/weighted_lm_loss": 4.253237247467041, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9541853070259094, + "theoretical_loss": 3.98057195458059, + "tokens_seen": 429260800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00042754445705637536, + "loss": 4.5223, + "theoretical_loss": 3.979900749205657, + "tokens_seen": 429916160 + }, + { + "epoch": 0.15, + "learning_rate": 0.00042735527809307605, + "loss": 4.4297, + "theoretical_loss": 3.9788295401751483, + "tokens_seen": 430964736 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004271660991297768, + "loss": 4.4719, + "theoretical_loss": 3.9777616620859186, + "tokens_seen": 432013312 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.4837666153907776, + "objective/train/docs_used": 255033, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.001176357269287, + "objective/train/original_loss": 4.001176834106445, + "objective/train/theoretical_loss": 3.9772289663870657, + "objective/train/tokens_used": 452997600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23786011338233948, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495855808258057, + "objective/train/weighted_lm_loss": 4.199849605560303, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9561724066734314, + "theoretical_loss": 3.9772289663870657, + "tokens_seen": 432537600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004269769201664775, + "loss": 4.4716, + "theoretical_loss": 3.976697096533171, + "tokens_seen": 433061888 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004267877412031782, + "loss": 4.4514, + "theoretical_loss": 3.975635825258053, + "tokens_seen": 434110464 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042659856223987893, + "loss": 4.448, + "theoretical_loss": 3.9745778301461483, + "tokens_seen": 435159040 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.4698527157306671, + "objective/train/docs_used": 256972, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.00714635848999, + "objective/train/original_loss": 4.007145881652832, + "objective/train/theoretical_loss": 3.9739182388865606, + "objective/train/tokens_used": 456274400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22894169390201569, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481488704681396, + "objective/train/weighted_lm_loss": 4.203254699707031, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9517961740493774, + "theoretical_loss": 3.9739182388865606, + "tokens_seen": 435814400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004264093832765796, + "loss": 4.4264, + "theoretical_loss": 3.9735230932259893, + "tokens_seen": 436207616 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004262202043132803, + "loss": 4.5008, + "theoretical_loss": 3.9724715966675896, + "tokens_seen": 437256192 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004260310253499811, + "loss": 4.4519, + "theoretical_loss": 3.9714233227809936, + "tokens_seen": 438304768 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.4772937297821045, + "objective/train/docs_used": 259026, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.386899948120117, + "objective/train/original_loss": 4.386899948120117, + "objective/train/theoretical_loss": 3.9706392216810085, + "objective/train/tokens_used": 459551200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24028657376766205, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048950433731079, + "objective/train/weighted_lm_loss": 4.601118564605713, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9514262080192566, + "theoretical_loss": 3.9706392216810085, + "tokens_seen": 439091200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004258418463866818, + "loss": 4.4169, + "theoretical_loss": 3.970378254014844, + "tokens_seen": 439353344 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042565266742338256, + "loss": 4.4168, + "theoretical_loss": 3.96933637295497, + "tokens_seen": 440401920 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042546348846008326, + "loss": 4.4585, + "theoretical_loss": 3.96829766232299, + "tokens_seen": 441450496 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.4840179681777954, + "objective/train/docs_used": 261126, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.244239330291748, + "objective/train/original_loss": 4.244239330291748, + "objective/train/theoretical_loss": 3.9673913777809253, + "objective/train/tokens_used": 462828000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24010293185710907, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496221780776978, + "objective/train/weighted_lm_loss": 4.457010746002197, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9591333270072937, + "theoretical_loss": 3.9673913777809253, + "tokens_seen": 442368000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042527430949678395, + "loss": 4.4311, + "theoretical_loss": 3.9672621049749335, + "tokens_seen": 442499072 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004250851305334847, + "loss": 4.4316, + "theoretical_loss": 3.96622968389988, + "tokens_seen": 443547648 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004248959515701854, + "loss": 4.3984, + "theoretical_loss": 3.9652003822186166, + "tokens_seen": 444596224 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.4815636873245239, + "objective/train/docs_used": 262792, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.1629133224487305, + "objective/train/original_loss": 4.1629133224487305, + "objective/train/theoretical_loss": 3.9641741831823065, + "objective/train/tokens_used": 466104800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24196134507656097, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493861436843872, + "objective/train/weighted_lm_loss": 4.367969989776611, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.9516691565513611, + "theoretical_loss": 3.9641741831823065, + "tokens_seen": 445644800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042470677260688614, + "loss": 4.48, + "theoretical_loss": 3.9641741831823065, + "tokens_seen": 445644800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042451759364358683, + "loss": 4.4281, + "theoretical_loss": 3.9631510701711816, + "tokens_seen": 446693376 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004243284146802875, + "loss": 4.4256, + "theoretical_loss": 3.9621310266932457, + "tokens_seen": 447741952 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042413923571698827, + "loss": 4.3659, + "theoretical_loss": 3.9611140363829977, + "tokens_seen": 448790528 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.481901079416275, + "objective/train/docs_used": 264102, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.1385955810546875, + "objective/train/original_loss": 4.138594627380371, + "objective/train/theoretical_loss": 3.960987126459872, + "objective/train/tokens_used": 469381600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2416316270828247, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494182109832764, + "objective/train/weighted_lm_loss": 4.344489097595215, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9545145630836487, + "theoretical_loss": 3.960987126459872, + "tokens_seen": 448921600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042395005675368897, + "loss": 4.4184, + "theoretical_loss": 3.9601000830001665, + "tokens_seen": 449839104 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042376087779038977, + "loss": 4.3922, + "theoretical_loss": 3.9590891504284635, + "tokens_seen": 450887680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042357169882709046, + "loss": 4.2944, + "theoretical_loss": 3.9580812226743523, + "tokens_seen": 451936256 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.490369588136673, + "objective/train/docs_used": 265980, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.172577381134033, + "objective/train/original_loss": 4.172577857971191, + "objective/train/theoretical_loss": 3.9578297083759195, + "objective/train/tokens_used": 472658400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2438024878501892, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050276279449463, + "objective/train/weighted_lm_loss": 4.382692337036133, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9516772627830505, + "theoretical_loss": 3.9578297083759195, + "tokens_seen": 452198400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042338251986379115, + "loss": 4.3378, + "theoretical_loss": 3.95707628386583, + "tokens_seen": 452984832 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004231933409004919, + "loss": 4.4524, + "theoretical_loss": 3.9560743182512255, + "tokens_seen": 454033408 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004230041619371926, + "loss": 4.3017, + "theoretical_loss": 3.9550753101980103, + "tokens_seen": 455081984 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.47568219900131226, + "objective/train/docs_used": 267725, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.0070085525512695, + "objective/train/original_loss": 4.0070085525512695, + "objective/train/theoretical_loss": 3.954701441504068, + "objective/train/tokens_used": 475935200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23760542273521423, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487757921218872, + "objective/train/weighted_lm_loss": 4.202053546905518, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9515478014945984, + "theoretical_loss": 3.954701441504068, + "tokens_seen": 455475200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004228149829738933, + "loss": 4.3259, + "theoretical_loss": 3.954079244191628, + "tokens_seen": 456130560 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042262580401059404, + "loss": 4.3595, + "theoretical_loss": 3.953086104834334, + "tokens_seen": 457179136 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042243662504729473, + "loss": 4.3055, + "theoretical_loss": 3.9520958768440484, + "tokens_seen": 458227712 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.47381123900413513, + "objective/train/docs_used": 269663, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.464710235595703, + "objective/train/original_loss": 4.464710235595703, + "objective/train/theoretical_loss": 3.951601849867233, + "objective/train/tokens_used": 479212000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23545365035533905, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485777854919434, + "objective/train/weighted_lm_loss": 4.680891036987305, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9522637724876404, + "theoretical_loss": 3.951601849867233, + "tokens_seen": 458752000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004222474460839955, + "loss": 4.2918, + "theoretical_loss": 3.951108545053229, + "tokens_seen": 459276288 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042205826712069617, + "loss": 4.3763, + "theoretical_loss": 3.9501240944077494, + "tokens_seen": 460324864 + }, + { + "epoch": 0.16, + "learning_rate": 0.00042186908815739686, + "loss": 4.3612, + "theoretical_loss": 3.949142509965799, + "tokens_seen": 461373440 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.48686516284942627, + "objective/train/docs_used": 271706, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.096804141998291, + "objective/train/original_loss": 4.096804618835449, + "objective/train/theoretical_loss": 3.948530468589195, + "objective/train/tokens_used": 482488800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23930074274539948, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499029159545898, + "objective/train/weighted_lm_loss": 4.302978515625, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9692782759666443, + "theoretical_loss": 3.948530468589195, + "tokens_seen": 462028800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004216799091940976, + "loss": 4.3302, + "theoretical_loss": 3.9481637768967883, + "tokens_seen": 462422016 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042149073023079836, + "loss": 4.3089, + "theoretical_loss": 3.9471878804802736, + "tokens_seen": 463470592 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004213015512674991, + "loss": 4.3075, + "theoretical_loss": 3.9462148061048907, + "tokens_seen": 464519168 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.48215600848197937, + "objective/train/docs_used": 273436, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.740396738052368, + "objective/train/original_loss": 3.740396499633789, + "objective/train/theoretical_loss": 3.9454868435591504, + "objective/train/tokens_used": 485765600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2377597689628601, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494240522384644, + "objective/train/weighted_lm_loss": 3.9267635345458984, + "objective/train/weights_max": 1.0512155294418335, + "objective/train/weights_min": 0.951538622379303, + "theoretical_loss": 3.9454868435591504, + "tokens_seen": 465305600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004211123723041998, + "loss": 4.3019, + "theoretical_loss": 3.945244539267303, + "tokens_seen": 465567744 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004209231933409005, + "loss": 4.2432, + "theoretical_loss": 3.9442770655711614, + "tokens_seen": 466616320 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042073401437760124, + "loss": 4.2614, + "theoretical_loss": 3.9433123707260775, + "tokens_seen": 467664896 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.4793383777141571, + "objective/train/docs_used": 275552, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.101151943206787, + "objective/train/original_loss": 4.101151943206787, + "objective/train/theoretical_loss": 3.9424705311086856, + "objective/train/tokens_used": 489042400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2382468432188034, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491446256637573, + "objective/train/weighted_lm_loss": 4.304407119750977, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9532943367958069, + "theoretical_loss": 3.9424705311086856, + "tokens_seen": 468582400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042054483541430194, + "loss": 4.2677, + "theoretical_loss": 3.9423504405466074, + "tokens_seen": 468713472 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042035565645100263, + "loss": 4.1909, + "theoretical_loss": 3.9413912609512485, + "tokens_seen": 469762048 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004201664774877034, + "loss": 4.3726, + "theoretical_loss": 3.940434817961448, + "tokens_seen": 470810624 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.4858047068119049, + "objective/train/docs_used": 277578, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.086038112640381, + "objective/train/original_loss": 4.086038112640381, + "objective/train/theoretical_loss": 3.939481097700623, + "objective/train/tokens_used": 492319200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2407042235136032, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498039722442627, + "objective/train/weighted_lm_loss": 4.289696216583252, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9598610401153564, + "theoretical_loss": 3.939481097700623, + "tokens_seen": 471859200 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041997729852440407, + "loss": 4.3418, + "theoretical_loss": 3.939481097700623, + "tokens_seen": 471859200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004197881195611048, + "loss": 4.2716, + "theoretical_loss": 3.9385300863931914, + "tokens_seen": 472907776 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004195989405978055, + "loss": 4.3335, + "theoretical_loss": 3.9375817703636167, + "tokens_seen": 473956352 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004194097616345062, + "loss": 4.2623, + "theoretical_loss": 3.9366361360354585, + "tokens_seen": 475004928 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.4816089868545532, + "objective/train/docs_used": 279357, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.476291656494141, + "objective/train/original_loss": 4.476291656494141, + "objective/train/theoretical_loss": 3.936518119629225, + "objective/train/tokens_used": 495596000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2425791174173355, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049393892288208, + "objective/train/weighted_lm_loss": 4.69778299331665, + "objective/train/weights_max": 1.0512197017669678, + "objective/train/weights_min": 0.9523156881332397, + "theoretical_loss": 3.936518119629225, + "tokens_seen": 475136000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000419220582671207, + "loss": 4.2778, + "theoretical_loss": 3.9356931699304427, + "tokens_seen": 476053504 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004190314037079077, + "loss": 4.3016, + "theoretical_loss": 3.9347528586675304, + "tokens_seen": 477102080 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041884222474460845, + "loss": 4.3464, + "theoretical_loss": 3.9338151889620114, + "tokens_seen": 478150656 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.4777883291244507, + "objective/train/docs_used": 280385, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.414139270782471, + "objective/train/original_loss": 4.4141387939453125, + "objective/train/theoretical_loss": 3.933581182731271, + "objective/train/tokens_used": 498872800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2336207926273346, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489662885665894, + "objective/train/weighted_lm_loss": 4.631285190582275, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9555152058601379, + "theoretical_loss": 3.933581182731271, + "tokens_seen": 478412800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041865304578130914, + "loss": 4.3909, + "theoretical_loss": 3.9328801476245987, + "tokens_seen": 479199232 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041846386681800983, + "loss": 4.3807, + "theoretical_loss": 3.9319477215605323, + "tokens_seen": 480247808 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004182746878547106, + "loss": 4.3472, + "theoretical_loss": 3.9310178977687045, + "tokens_seen": 481296384 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.48086094856262207, + "objective/train/docs_used": 282096, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.888185977935791, + "objective/train/original_loss": 3.888186454772949, + "objective/train/theoretical_loss": 3.930669882107529, + "objective/train/tokens_used": 502149600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23717832565307617, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049291729927063, + "objective/train/weighted_lm_loss": 4.0802998542785645, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9538412094116211, + "theoretical_loss": 3.930669882107529, + "tokens_seen": 481689600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004180855088914113, + "loss": 4.2477, + "theoretical_loss": 3.930090663340782, + "tokens_seen": 482344960 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041789632992811197, + "loss": 4.267, + "theoretical_loss": 3.9291660054603454, + "tokens_seen": 483393536 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004177071509648127, + "loss": 4.2292, + "theoretical_loss": 3.9282439114020375, + "tokens_seen": 484442112 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.48390454053878784, + "objective/train/docs_used": 284179, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.326482772827148, + "objective/train/original_loss": 4.326481819152832, + "objective/train/theoretical_loss": 3.927783821854201, + "objective/train/tokens_used": 505426400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2400244176387787, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496103763580322, + "objective/train/weighted_lm_loss": 4.54107666015625, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9513835310935974, + "theoretical_loss": 3.927783821854201, + "tokens_seen": 484966400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004175179720015134, + "loss": 4.2245, + "theoretical_loss": 3.927324368530723, + "tokens_seen": 485490688 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041732879303821416, + "loss": 4.1996, + "theoretical_loss": 3.926407364300649, + "tokens_seen": 486539264 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041713961407491485, + "loss": 4.2332, + "theoretical_loss": 3.9254928862546303, + "tokens_seen": 487587840 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.4841022193431854, + "objective/train/docs_used": 285838, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9023804664611816, + "objective/train/original_loss": 3.90238094329834, + "objective/train/theoretical_loss": 3.9249226148038927, + "objective/train/tokens_used": 508703200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2402951419353485, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496317148208618, + "objective/train/weighted_lm_loss": 4.096041202545166, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9679876565933228, + "theoretical_loss": 3.9249226148038927, + "tokens_seen": 488243200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004169504351116156, + "loss": 4.2304, + "theoretical_loss": 3.9245809220232295, + "tokens_seen": 488636416 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041676125614831635, + "loss": 4.2336, + "theoretical_loss": 3.923671459323953, + "tokens_seen": 489684992 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041657207718501704, + "loss": 4.1907, + "theoretical_loss": 3.9227644859604562, + "tokens_seen": 490733568 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.49535855650901794, + "objective/train/docs_used": 287192, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.264978885650635, + "objective/train/original_loss": 4.264978408813477, + "objective/train/theoretical_loss": 3.9220858822757396, + "objective/train/tokens_used": 511980000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24591341614723206, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050786018371582, + "objective/train/weighted_lm_loss": 4.481633186340332, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9592586159706116, + "theoretical_loss": 3.9220858822757396, + "tokens_seen": 491520000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004163828982217178, + "loss": 4.234, + "theoretical_loss": 3.9218599898217583, + "tokens_seen": 491782144 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004161937192584185, + "loss": 4.2348, + "theoretical_loss": 3.92095795888146, + "tokens_seen": 492830720 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004160045402951192, + "loss": 4.2251, + "theoretical_loss": 3.9200583811969785, + "tokens_seen": 493879296 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.4876641631126404, + "objective/train/docs_used": 289259, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9813899993896484, + "objective/train/original_loss": 3.9813899993896484, + "objective/train/theoretical_loss": 3.9192732538342785, + "objective/train/tokens_used": 515256800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24164560437202454, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049994945526123, + "objective/train/weighted_lm_loss": 4.1802239418029785, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9596173763275146, + "theoretical_loss": 3.9192732538342785, + "tokens_seen": 494796800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004158153613318199, + "loss": 4.2636, + "theoretical_loss": 3.919161244908785, + "tokens_seen": 494927872 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004156261823685206, + "loss": 4.2658, + "theoretical_loss": 3.918266538239653, + "tokens_seen": 495976448 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041543700340522136, + "loss": 4.2883, + "theoretical_loss": 3.917374249493913, + "tokens_seen": 497025024 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.4695127606391907, + "objective/train/docs_used": 291398, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.011775970458984, + "objective/train/original_loss": 4.011776447296143, + "objective/train/theoretical_loss": 3.9164843670567215, + "objective/train/tokens_used": 518533600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23077881336212158, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048124074935913, + "objective/train/weighted_lm_loss": 4.204472064971924, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9517326354980469, + "theoretical_loss": 3.9164843670567215, + "tokens_seen": 498073600 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041524782444192206, + "loss": 4.2508, + "theoretical_loss": 3.9164843670567215, + "tokens_seen": 498073600 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041505864547862275, + "loss": 4.255, + "theoretical_loss": 3.9155968793933273, + "tokens_seen": 499122176 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004148694665153235, + "loss": 4.2992, + "theoretical_loss": 3.9147117750483584, + "tokens_seen": 500170752 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041468028755202425, + "loss": 4.2444, + "theoretical_loss": 3.913829042645107, + "tokens_seen": 501219328 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.46639207005500793, + "objective/train/docs_used": 293236, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7727935314178467, + "objective/train/original_loss": 3.772793769836426, + "objective/train/theoretical_loss": 3.913718867308278, + "objective/train/tokens_used": 521810400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23303526639938354, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047823190689087, + "objective/train/weighted_lm_loss": 3.950061559677124, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9522370100021362, + "theoretical_loss": 3.913718867308278, + "tokens_seen": 501350400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041449110858872494, + "loss": 4.1459, + "theoretical_loss": 3.912948670884827, + "tokens_seen": 502267904 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004143019296254257, + "loss": 4.1465, + "theoretical_loss": 3.912070648546038, + "tokens_seen": 503316480 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004141127506621264, + "loss": 4.1508, + "theoretical_loss": 3.9111949644838386, + "tokens_seen": 504365056 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.4696812927722931, + "objective/train/docs_used": 295292, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.771225690841675, + "objective/train/original_loss": 3.7712254524230957, + "objective/train/theoretical_loss": 3.910976407525199, + "objective/train/tokens_used": 525087200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23411457240581512, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481582880020142, + "objective/train/weighted_lm_loss": 3.9577152729034424, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.95467609167099, + "theoretical_loss": 3.910976407525199, + "tokens_seen": 504627200 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041392357169882713, + "loss": 4.1547, + "theoretical_loss": 3.910321607629225, + "tokens_seen": 505413632 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004137343927355278, + "loss": 4.1705, + "theoretical_loss": 3.9094505669884168, + "tokens_seen": 506462208 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004135452137722285, + "loss": 4.1505, + "theoretical_loss": 3.9085818316421945, + "tokens_seen": 507510784 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.4871479272842407, + "objective/train/docs_used": 297338, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9234955310821533, + "objective/train/original_loss": 3.923495292663574, + "objective/train/theoretical_loss": 3.9082566480052314, + "objective/train/tokens_used": 528364000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23970387876033783, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499333143234253, + "objective/train/weighted_lm_loss": 4.120626449584961, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9618850350379944, + "theoretical_loss": 3.9082566480052314, + "tokens_seen": 507904000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041335603480892926, + "loss": 4.1253, + "theoretical_loss": 3.9077153907452367, + "tokens_seen": 508559360 + }, + { + "epoch": 0.18, + "learning_rate": 0.00041316685584562996, + "loss": 4.1998, + "theoretical_loss": 3.9068512335254724, + "tokens_seen": 509607936 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004129776768823307, + "loss": 4.145, + "theoretical_loss": 3.905989349283435, + "tokens_seen": 510656512 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.46928656101226807, + "objective/train/docs_used": 299351, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7585744857788086, + "objective/train/original_loss": 3.7585747241973877, + "objective/train/theoretical_loss": 3.9055592562051764, + "objective/train/tokens_used": 531640800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23103711009025574, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481030941009521, + "objective/train/weighted_lm_loss": 3.9365973472595215, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9520386457443237, + "theoretical_loss": 3.9055592562051764, + "tokens_seen": 511180800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004127884979190314, + "loss": 4.1397, + "theoretical_loss": 3.9051297273916257, + "tokens_seen": 511705088 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004125993189557321, + "loss": 4.0981, + "theoretical_loss": 3.9042723572938836, + "tokens_seen": 512753664 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004124101399924329, + "loss": 4.0203, + "theoretical_loss": 3.9034172285047597, + "tokens_seen": 513802240 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.4936114549636841, + "objective/train/docs_used": 301377, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8155317306518555, + "objective/train/original_loss": 3.8155317306518555, + "objective/train/theoretical_loss": 3.9028839065452745, + "objective/train/tokens_used": 534917600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24442671239376068, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506037473678589, + "objective/train/weighted_lm_loss": 4.008247375488281, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9937735795974731, + "theoretical_loss": 3.9028839065452745, + "tokens_seen": 514457600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004122209610291336, + "loss": 4.0773, + "theoretical_loss": 3.902564330608904, + "tokens_seen": 514850816 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004120317820658343, + "loss": 4.0716, + "theoretical_loss": 3.901713653260452, + "tokens_seen": 515899392 + }, + { + "epoch": 0.18, + "learning_rate": 0.000411842603102535, + "loss": 4.114, + "theoretical_loss": 3.900865186182421, + "tokens_seen": 516947968 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.4887354075908661, + "objective/train/docs_used": 303002, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.991806983947754, + "objective/train/original_loss": 3.991806983947754, + "objective/train/theoretical_loss": 3.9002302802201427, + "objective/train/tokens_used": 538194400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.243209108710289, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501097440719604, + "objective/train/weighted_lm_loss": 4.191769123077393, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9525777697563171, + "theoretical_loss": 3.9002302802201427, + "tokens_seen": 517734400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004116534241392357, + "loss": 4.1111, + "theoretical_loss": 3.9000189191661163, + "tokens_seen": 517996544 + }, + { + "epoch": 0.19, + "learning_rate": 0.00041146424517593647, + "loss": 4.0989, + "theoretical_loss": 3.8991748420705363, + "tokens_seen": 519045120 + }, + { + "epoch": 0.19, + "learning_rate": 0.00041127506621263716, + "loss": 4.112, + "theoretical_loss": 3.8983329448217905, + "tokens_seen": 520093696 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.4678727686405182, + "objective/train/docs_used": 304925, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8752501010894775, + "objective/train/original_loss": 3.8752501010894775, + "objective/train/theoretical_loss": 3.8975980650160067, + "objective/train/tokens_used": 541471200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22843264043331146, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0479482412338257, + "objective/train/weighted_lm_loss": 4.060460567474365, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9528185129165649, + "theoretical_loss": 3.8975980650160067, + "tokens_seen": 521011200 + }, + { + "epoch": 0.19, + "learning_rate": 0.00041108588724933785, + "loss": 4.0669, + "theoretical_loss": 3.8974932174125194, + "tokens_seen": 521142272 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004108967082860386, + "loss": 4.147, + "theoretical_loss": 3.896655649901324, + "tokens_seen": 522190848 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004107075293227393, + "loss": 4.117, + "theoretical_loss": 3.8958202324121984, + "tokens_seen": 523239424 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.48702916502952576, + "objective/train/docs_used": 306825, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5543291568756104, + "objective/train/original_loss": 3.5543293952941895, + "objective/train/theoretical_loss": 3.8949869551339704, + "objective/train/tokens_used": 544748000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23956939578056335, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049920678138733, + "objective/train/weighted_lm_loss": 3.730886936187744, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9572703242301941, + "theoretical_loss": 3.8949869551339704, + "tokens_seen": 524288000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00041051835035944004, + "loss": 4.0828, + "theoretical_loss": 3.8949869551339704, + "tokens_seen": 524288000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00041032917139614074, + "loss": 4.0647, + "theoretical_loss": 3.8941558083197467, + "tokens_seen": 525336576 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004101399924328415, + "loss": 4.0718, + "theoretical_loss": 3.8933267822863646, + "tokens_seen": 526385152 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040995081346954223, + "loss": 4.046, + "theoretical_loss": 3.8924998674138487, + "tokens_seen": 527433728 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.4876173734664917, + "objective/train/docs_used": 308042, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9242730140686035, + "objective/train/original_loss": 3.9242730140686035, + "objective/train/theoretical_loss": 3.892396651019104, + "objective/train/tokens_used": 548024800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24128563702106476, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499881505966187, + "objective/train/weighted_lm_loss": 4.1212382316589355, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9571343064308167, + "theoretical_loss": 3.892396651019104, + "tokens_seen": 527564800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004097616345062429, + "loss": 4.1247, + "theoretical_loss": 3.8916750541448764, + "tokens_seen": 528482304 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004095724555429436, + "loss": 4.1589, + "theoretical_loss": 3.890852332984242, + "tokens_seen": 529530880 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040938327657964437, + "loss": 4.1626, + "theoretical_loss": 3.890031694498337, + "tokens_seen": 530579456 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.47788065671920776, + "objective/train/docs_used": 310103, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8699586391448975, + "objective/train/original_loss": 3.8699588775634766, + "objective/train/theoretical_loss": 3.889826859195108, + "objective/train/tokens_used": 551301600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23559047281742096, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048985481262207, + "objective/train/weighted_lm_loss": 4.060449600219727, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9523392915725708, + "theoretical_loss": 3.889826859195108, + "tokens_seen": 530841600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040919409761634506, + "loss": 4.0974, + "theoretical_loss": 3.8892131293146237, + "tokens_seen": 531628032 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004090049186530458, + "loss": 3.9809, + "theoretical_loss": 3.888396628121124, + "tokens_seen": 532676608 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004088157396897465, + "loss": 4.0358, + "theoretical_loss": 3.887582181665909, + "tokens_seen": 533725184 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.4851924479007721, + "objective/train/docs_used": 311341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.86273193359375, + "objective/train/original_loss": 3.862731695175171, + "objective/train/theoretical_loss": 3.887277292104349, + "objective/train/tokens_used": 554578400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24329021573066711, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497558116912842, + "objective/train/weighted_lm_loss": 4.054152965545654, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9514743685722351, + "theoretical_loss": 3.887277292104349, + "tokens_seen": 534118400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004086265607264472, + "loss": 4.0574, + "theoretical_loss": 3.8867697807565937, + "tokens_seen": 534773760 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040843738176314794, + "loss": 4.0136, + "theoretical_loss": 3.8859594162598396, + "tokens_seen": 535822336 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040824820279984864, + "loss": 4.1343, + "theoretical_loss": 3.8851510791008588, + "tokens_seen": 536870912 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.47740688920021057, + "objective/train/docs_used": 313016, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9105546474456787, + "objective/train/original_loss": 3.910554885864258, + "objective/train/theoretical_loss": 3.884747667953053, + "objective/train/tokens_used": 557855200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2320510745048523, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489201545715332, + "objective/train/weighted_lm_loss": 4.102197170257568, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9539920091629028, + "theoretical_loss": 3.884747667953053, + "tokens_seen": 537395200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004080590238365494, + "loss": 4.1085, + "theoretical_loss": 3.8843447602629277, + "tokens_seen": 537919488 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040786984487325013, + "loss": 4.0926, + "theoretical_loss": 3.883540450786901, + "tokens_seen": 538968064 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004076806659099508, + "loss": 4.0265, + "theoretical_loss": 3.8827381417707327, + "tokens_seen": 540016640 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.4817216992378235, + "objective/train/docs_used": 314826, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.573957681655884, + "objective/train/original_loss": 3.573957920074463, + "objective/train/theoretical_loss": 3.8822377105614674, + "objective/train/tokens_used": 561132000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2370116412639618, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493767261505127, + "objective/train/weighted_lm_loss": 3.7496209144592285, + "objective/train/weights_max": 1.0512157678604126, + "objective/train/weights_min": 0.9515175819396973, + "theoretical_loss": 3.8822377105614674, + "tokens_seen": 540672000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040749148694665157, + "loss": 4.0104, + "theoretical_loss": 3.8819378243690044, + "tokens_seen": 541065216 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040730230798335227, + "loss": 4.0048, + "theoretical_loss": 3.881139489792454, + "tokens_seen": 542113792 + }, + { + "epoch": 0.19, + "learning_rate": 0.00040711312902005296, + "loss": 4.0798, + "theoretical_loss": 3.880343129307512, + "tokens_seen": 543162368 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.47069650888442993, + "objective/train/docs_used": 316612, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.242428302764893, + "objective/train/original_loss": 4.242428779602051, + "objective/train/theoretical_loss": 3.8797471492187987, + "objective/train/tokens_used": 564408800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22920586168766022, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0482345819473267, + "objective/train/weighted_lm_loss": 4.447694301605225, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.962023138999939, + "theoretical_loss": 3.8797471492187987, + "tokens_seen": 543948800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004069239500567537, + "loss": 4.1018, + "theoretical_loss": 3.879548734235843, + "tokens_seen": 544210944 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004067347710934544, + "loss": 4.0261, + "theoretical_loss": 3.878756295953889, + "tokens_seen": 545259520 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040654559213015515, + "loss": 4.1229, + "theoretical_loss": 3.87796580589242, + "tokens_seen": 546308096 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.4922097623348236, + "objective/train/docs_used": 318614, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.817476749420166, + "objective/train/original_loss": 3.817476511001587, + "objective/train/theoretical_loss": 3.877275718542742, + "objective/train/tokens_used": 567685600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2433522641658783, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504580736160278, + "objective/train/weighted_lm_loss": 4.011016368865967, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9557735323905945, + "theoretical_loss": 3.877275718542742, + "tokens_seen": 547225600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040635641316685584, + "loss": 4.0408, + "theoretical_loss": 3.877177255536089, + "tokens_seen": 547356672 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040616723420355653, + "loss": 4.0115, + "theoretical_loss": 3.8763906364229888, + "tokens_seen": 548405248 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004059780552402573, + "loss": 3.9969, + "theoretical_loss": 3.875605940144217, + "tokens_seen": 549453824 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.4792472720146179, + "objective/train/docs_used": 320433, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.018838405609131, + "objective/train/original_loss": 4.018837928771973, + "objective/train/theoretical_loss": 3.8748231583434425, + "objective/train/tokens_used": 570962400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24296848475933075, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491595268249512, + "objective/train/weighted_lm_loss": 4.21675968170166, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9513953924179077, + "theoretical_loss": 3.8748231583434425, + "tokens_seen": 550502400 + }, + { + "epoch": 0.2, + "learning_rate": 0.000405788876276958, + "loss": 3.9935, + "theoretical_loss": 3.8748231583434425, + "tokens_seen": 550502400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004055996973136588, + "loss": 3.9662, + "theoretical_loss": 3.8740422827164784, + "tokens_seen": 551550976 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040541051835035947, + "loss": 4.0177, + "theoretical_loss": 3.873263305010858, + "tokens_seen": 552599552 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040522133938706016, + "loss": 4.0032, + "theoretical_loss": 3.872486217025413, + "tokens_seen": 553648128 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.46256640553474426, + "objective/train/docs_used": 322451, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.843613386154175, + "objective/train/original_loss": 3.843613624572754, + "objective/train/theoretical_loss": 3.872389213491709, + "objective/train/tokens_used": 574239200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23105382919311523, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0474309921264648, + "objective/train/weighted_lm_loss": 4.021306037902832, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9536635279655457, + "theoretical_loss": 3.872389213491709, + "tokens_seen": 553779200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004050321604237609, + "loss": 3.9792, + "theoretical_loss": 3.8717110106098627, + "tokens_seen": 554696704 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004048429814604616, + "loss": 4.0535, + "theoretical_loss": 3.870937677664398, + "tokens_seen": 555745280 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040465380249716235, + "loss": 4.0048, + "theoretical_loss": 3.870166210139278, + "tokens_seen": 556793856 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.4924386441707611, + "objective/train/docs_used": 324068, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.743295431137085, + "objective/train/original_loss": 3.743295431137085, + "objective/train/theoretical_loss": 3.869973633791332, + "objective/train/tokens_used": 577516000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24441003799438477, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504863262176514, + "objective/train/weighted_lm_loss": 3.932000160217285, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9550192356109619, + "theoretical_loss": 3.869973633791332, + "tokens_seen": 557056000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040446462353386305, + "loss": 4.0371, + "theoretical_loss": 3.8693966000344253, + "tokens_seen": 557842432 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040427544457056374, + "loss": 4.0484, + "theoretical_loss": 3.868628839399026, + "tokens_seen": 558891008 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004040862656072645, + "loss": 4.0689, + "theoretical_loss": 3.8678629203311368, + "tokens_seen": 559939584 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.48858821392059326, + "objective/train/docs_used": 325860, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9442625045776367, + "objective/train/original_loss": 3.9442625045776367, + "objective/train/theoretical_loss": 3.8675761738553596, + "objective/train/tokens_used": 580792800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2418859302997589, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500881671905518, + "objective/train/weighted_lm_loss": 4.1417741775512695, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9529458284378052, + "theoretical_loss": 3.8675761738553596, + "tokens_seen": 560332800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004038970866439652, + "loss": 4.0605, + "theoretical_loss": 3.8670988349772912, + "tokens_seen": 560988160 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004037079076806659, + "loss": 3.9893, + "theoretical_loss": 3.8663365755321157, + "tokens_seen": 562036736 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004035187287173666, + "loss": 4.0399, + "theoretical_loss": 3.865576134237943, + "tokens_seen": 563085312 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.4872007668018341, + "objective/train/docs_used": 327873, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9615707397460938, + "objective/train/original_loss": 3.9615705013275146, + "objective/train/theoretical_loss": 3.8651965929861625, + "objective/train/tokens_used": 584069600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23878945410251617, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499337911605835, + "objective/train/weighted_lm_loss": 4.16065788269043, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.954364538192749, + "theoretical_loss": 3.8651965929861625, + "tokens_seen": 563609600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040332954975406737, + "loss": 3.9192, + "theoretical_loss": 3.8648175033844323, + "tokens_seen": 564133888 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004031403707907681, + "loss": 3.9902, + "theoretical_loss": 3.8640606753081954, + "tokens_seen": 565182464 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004029511918274688, + "loss": 4.0045, + "theoretical_loss": 3.8633056423924232, + "tokens_seen": 566231040 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.4880378842353821, + "objective/train/docs_used": 329813, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8092806339263916, + "objective/train/original_loss": 3.8092806339263916, + "objective/train/theoretical_loss": 3.8628346550591868, + "objective/train/tokens_used": 587346400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24268953502178192, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500373840332031, + "objective/train/weighted_lm_loss": 3.999559164047241, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9522239565849304, + "theoretical_loss": 3.8628346550591868, + "tokens_seen": 566886400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004027620128641695, + "loss": 3.9948, + "theoretical_loss": 3.8625523970665174, + "tokens_seen": 567279616 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040257283390087025, + "loss": 3.9634, + "theoretical_loss": 3.8618009318057234, + "tokens_seen": 568328192 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040238365493757095, + "loss": 3.984, + "theoretical_loss": 3.861051239130771, + "tokens_seen": 569376768 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.4942159652709961, + "objective/train/docs_used": 331022, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.885915756225586, + "objective/train/original_loss": 3.885915517807007, + "objective/train/theoretical_loss": 3.8604901284102264, + "objective/train/tokens_used": 590623200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24478492140769958, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506659746170044, + "objective/train/weighted_lm_loss": 4.08283805847168, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 1.01108717918396, + "theoretical_loss": 3.8604901284102264, + "tokens_seen": 570163200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004021944759742717, + "loss": 3.956, + "theoretical_loss": 3.860303311607516, + "tokens_seen": 570425344 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004020052970109724, + "loss": 3.9688, + "theoretical_loss": 3.859557141846584, + "tokens_seen": 571473920 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004018161180476731, + "loss": 3.9664, + "theoretical_loss": 3.858812722503022, + "tokens_seen": 572522496 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.48668184876441956, + "objective/train/docs_used": 332875, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9940710067749023, + "objective/train/original_loss": 3.994070529937744, + "objective/train/theoretical_loss": 3.8581627857261136, + "objective/train/tokens_used": 593900000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23973610997200012, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498868227005005, + "objective/train/weighted_lm_loss": 4.192169189453125, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9601668119430542, + "theoretical_loss": 3.8581627857261136, + "tokens_seen": 573440000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00040162693908437383, + "loss": 3.9147, + "theoretical_loss": 3.8580700462759463, + "tokens_seen": 573571072 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004014377601210745, + "loss": 3.8744, + "theoretical_loss": 3.857329105908203, + "tokens_seen": 574619648 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004012485811577752, + "loss": 3.8891, + "theoretical_loss": 3.8565898941860244, + "tokens_seen": 575668224 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.48827463388442993, + "objective/train/docs_used": 334850, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.9289157390594482, + "objective/train/original_loss": 3.928915500640869, + "objective/train/theoretical_loss": 3.855852403938689, + "objective/train/tokens_used": 597176800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24086636304855347, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500519275665283, + "objective/train/weighted_lm_loss": 4.12490701675415, + "objective/train/weights_max": 1.0512205362319946, + "objective/train/weights_min": 0.9660784602165222, + "theoretical_loss": 3.855852403938689, + "tokens_seen": 576716800 + }, + { + "epoch": 0.21, + "learning_rate": 0.000401059402194476, + "loss": 3.9544, + "theoretical_loss": 3.855852403938689, + "tokens_seen": 576716800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004008702232311767, + "loss": 3.8938, + "theoretical_loss": 3.8551166280381928, + "tokens_seen": 577765376 + }, + { + "epoch": 0.21, + "learning_rate": 0.00040068104426787746, + "loss": 3.9262, + "theoretical_loss": 3.854382559398911, + "tokens_seen": 578813952 + }, + { + "epoch": 0.21, + "learning_rate": 0.00040049186530457815, + "loss": 3.9064, + "theoretical_loss": 3.8536501909772745, + "tokens_seen": 579862528 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4799477756023407, + "objective/train/docs_used": 336483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.157769680023193, + "objective/train/original_loss": 4.157769203186035, + "objective/train/theoretical_loss": 3.8535587641219466, + "objective/train/tokens_used": 600453600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23627838492393494, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049195647239685, + "objective/train/weighted_lm_loss": 4.361729621887207, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9601474404335022, + "theoretical_loss": 3.8535587641219466, + "tokens_seen": 579993600 + }, + { + "epoch": 0.21, + "learning_rate": 0.00040030268634127884, + "loss": 3.9204, + "theoretical_loss": 3.852919515771444, + "tokens_seen": 580911104 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004001135073779796, + "loss": 3.8705, + "theoretical_loss": 3.8521905268209857, + "tokens_seen": 581959680 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003999243284146803, + "loss": 3.8418, + "theoretical_loss": 3.851463217206555, + "tokens_seen": 583008256 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4861145317554474, + "objective/train/docs_used": 338617, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5503835678100586, + "objective/train/original_loss": 3.5503835678100586, + "objective/train/theoretical_loss": 3.8512816513922274, + "objective/train/tokens_used": 603730400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23959481716156006, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498292446136475, + "objective/train/weighted_lm_loss": 3.7280282974243164, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9553146362304688, + "theoretical_loss": 3.8512816513922274, + "tokens_seen": 583270400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039973514945138103, + "loss": 3.9243, + "theoretical_loss": 3.85073758004958, + "tokens_seen": 584056832 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003995459704880817, + "loss": 3.8881, + "theoretical_loss": 3.850013608511947, + "tokens_seen": 585105408 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003993567915247824, + "loss": 3.8762, + "theoretical_loss": 3.8492912957956933, + "tokens_seen": 586153984 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.49152064323425293, + "objective/train/docs_used": 340897, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.6887147426605225, + "objective/train/original_loss": 3.6887147426605225, + "objective/train/theoretical_loss": 3.849020854811377, + "objective/train/tokens_used": 607007200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24309617280960083, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503878593444824, + "objective/train/weighted_lm_loss": 3.874969959259033, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.955743134021759, + "theoretical_loss": 3.849020854811377, + "tokens_seen": 586547200 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039916761256148317, + "loss": 3.8016, + "theoretical_loss": 3.848570635142696, + "tokens_seen": 587202560 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039897843359818386, + "loss": 3.868, + "theoretical_loss": 3.8478516198343717, + "tokens_seen": 588251136 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003987892546348846, + "loss": 3.8427, + "theoretical_loss": 3.847134243191375, + "tokens_seen": 589299712 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4828517436981201, + "objective/train/docs_used": 342928, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4539072513580322, + "objective/train/original_loss": 3.4539074897766113, + "objective/train/theoretical_loss": 3.8467761672927336, + "objective/train/tokens_used": 610284000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23826996982097626, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494961738586426, + "objective/train/weighted_lm_loss": 3.62439227104187, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9543166756629944, + "theoretical_loss": 3.8467761672927336, + "tokens_seen": 589824000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039860007567158536, + "loss": 3.8673, + "theoretical_loss": 3.8464184985732968, + "tokens_seen": 590348288 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039841089670828605, + "loss": 3.8051, + "theoretical_loss": 3.845704379378372, + "tokens_seen": 591396864 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003982217177449868, + "loss": 3.8316, + "theoretical_loss": 3.8449918790431843, + "tokens_seen": 592445440 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4931422472000122, + "objective/train/docs_used": 344591, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7263171672821045, + "objective/train/original_loss": 3.7263169288635254, + "objective/train/theoretical_loss": 3.844547385509876, + "objective/train/tokens_used": 613560800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24458102881908417, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505576133728027, + "objective/train/weighted_lm_loss": 3.9147326946258545, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.958858847618103, + "theoretical_loss": 3.844547385509876, + "tokens_seen": 593100800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003980325387816875, + "loss": 3.7537, + "theoretical_loss": 3.8442809910423783, + "tokens_seen": 593494016 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003978433598183882, + "loss": 3.8097, + "theoretical_loss": 3.8435717088883696, + "tokens_seen": 594542592 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039765418085508893, + "loss": 3.8132, + "theoretical_loss": 3.842864026131061, + "tokens_seen": 595591168 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4828213155269623, + "objective/train/docs_used": 346610, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7849457263946533, + "objective/train/original_loss": 3.784945487976074, + "objective/train/theoretical_loss": 3.8423343098080185, + "objective/train/tokens_used": 616837600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23697614669799805, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494866371154785, + "objective/train/weighted_lm_loss": 3.9723143577575684, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9641447067260742, + "theoretical_loss": 3.8423343098080185, + "tokens_seen": 596377600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003974650018917896, + "loss": 3.8631, + "theoretical_loss": 3.8421579363575615, + "tokens_seen": 596639744 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003972758229284904, + "loss": 3.8146, + "theoretical_loss": 3.841453433191904, + "tokens_seen": 597688320 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039708664396519107, + "loss": 3.6873, + "theoretical_loss": 3.8407505102947725, + "tokens_seen": 598736896 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4851977527141571, + "objective/train/docs_used": 348552, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.527412176132202, + "objective/train/original_loss": 3.527411937713623, + "objective/train/theoretical_loss": 3.8401367441179683, + "objective/train/tokens_used": 620114400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23921814560890198, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04973566532135, + "objective/train/weighted_lm_loss": 3.7025134563446045, + "objective/train/weights_max": 1.0512197017669678, + "objective/train/weights_min": 0.9565510153770447, + "theoretical_loss": 3.8401367441179683, + "tokens_seen": 599654400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039689746500189176, + "loss": 3.8363, + "theoretical_loss": 3.840049161363223, + "tokens_seen": 599785472 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003967082860385925, + "loss": 3.8554, + "theoretical_loss": 3.839349380130415, + "tokens_seen": 600834048 + }, + { + "epoch": 0.21, + "learning_rate": 0.00039651910707529326, + "loss": 3.8245, + "theoretical_loss": 3.838651160365341, + "tokens_seen": 601882624 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.4847736060619354, + "objective/train/docs_used": 349996, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.6133832931518555, + "objective/train/original_loss": 3.6133837699890137, + "objective/train/theoretical_loss": 3.837954495872559, + "objective/train/tokens_used": 623391200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24165962636470795, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497056245803833, + "objective/train/weighted_lm_loss": 3.7934508323669434, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9520853757858276, + "theoretical_loss": 3.837954495872559, + "tokens_seen": 602931200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039632992811199395, + "loss": 3.8619, + "theoretical_loss": 3.837954495872559, + "tokens_seen": 602931200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003961407491486947, + "loss": 3.7791, + "theoretical_loss": 3.837259380491929, + "tokens_seen": 603979776 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003959515701853954, + "loss": 3.8418, + "theoretical_loss": 3.836565808098351, + "tokens_seen": 605028352 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039576239122209614, + "loss": 3.7307, + "theoretical_loss": 3.835873772601505, + "tokens_seen": 606076928 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.4784153997898102, + "objective/train/docs_used": 351917, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.869309663772583, + "objective/train/original_loss": 3.869309902191162, + "objective/train/theoretical_loss": 3.8357873759254693, + "objective/train/tokens_used": 626668000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23981763422489166, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490604639053345, + "objective/train/weighted_lm_loss": 4.058266639709473, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9513825178146362, + "theoretical_loss": 3.8357873759254693, + "tokens_seen": 606208000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039557321225879683, + "loss": 3.8473, + "theoretical_loss": 3.8351832679455935, + "tokens_seen": 607125504 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003953840332954975, + "loss": 3.8616, + "theoretical_loss": 3.834494288109086, + "tokens_seen": 608174080 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039519485433219827, + "loss": 3.7887, + "theoretical_loss": 3.8338068271044703, + "tokens_seen": 609222656 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.4771646559238434, + "objective/train/docs_used": 353838, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.676715850830078, + "objective/train/original_loss": 3.6767160892486572, + "objective/train/theoretical_loss": 3.833635198472356, + "objective/train/tokens_used": 629944800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24118247628211975, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489422082901, + "objective/train/weighted_lm_loss": 3.8588476181030273, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9538320899009705, + "theoretical_loss": 3.833635198472356, + "tokens_seen": 609484800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039500567536889897, + "loss": 3.734, + "theoretical_loss": 3.8331208789779954, + "tokens_seen": 610271232 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003948164964055997, + "loss": 3.7623, + "theoretical_loss": 3.83243643780943, + "tokens_seen": 611319808 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003946273174423004, + "loss": 3.7609, + "theoretical_loss": 3.8317534977118117, + "tokens_seen": 612368384 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.48455891013145447, + "objective/train/docs_used": 355739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.41206693649292, + "objective/train/original_loss": 3.4120664596557617, + "objective/train/theoretical_loss": 3.831497780974214, + "objective/train/tokens_used": 633221600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24124765396118164, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496820211410522, + "objective/train/weighted_lm_loss": 3.581446647644043, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9515277147293091, + "theoretical_loss": 3.831497780974214, + "tokens_seen": 612761600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003944381384790011, + "loss": 3.712, + "theoretical_loss": 3.8310720528312077, + "tokens_seen": 613416960 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003942489595157019, + "loss": 3.8369, + "theoretical_loss": 3.830392097346471, + "tokens_seen": 614465536 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003940597805524026, + "loss": 3.783, + "theoretical_loss": 3.8297136254690005, + "tokens_seen": 615514112 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.4844302237033844, + "objective/train/docs_used": 357525, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7821826934814453, + "objective/train/original_loss": 3.782182216644287, + "objective/train/theoretical_loss": 3.829374944082894, + "objective/train/tokens_used": 636498400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2419605404138565, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496729612350464, + "objective/train/weighted_lm_loss": 3.9696102142333984, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9518074989318848, + "theoretical_loss": 3.829374944082894, + "tokens_seen": 616038400 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039387060158910334, + "loss": 3.7717, + "theoretical_loss": 3.829036631442506, + "tokens_seen": 616562688 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039368142262580404, + "loss": 3.8207, + "theoretical_loss": 3.8283611095427723, + "tokens_seen": 617611264 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039349224366250473, + "loss": 3.8517, + "theoretical_loss": 3.827687054077426, + "tokens_seen": 618659840 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.48121803998947144, + "objective/train/docs_used": 359292, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7164201736450195, + "objective/train/original_loss": 3.7164201736450195, + "objective/train/theoretical_loss": 3.8272665115687077, + "objective/train/tokens_used": 639775200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23925188183784485, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493377447128296, + "objective/train/weighted_lm_loss": 3.8987090587615967, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9516848921775818, + "theoretical_loss": 3.8272665115687077, + "tokens_seen": 619315200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003933030646992055, + "loss": 3.7766, + "theoretical_loss": 3.8270144593857056, + "tokens_seen": 619708416 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039311388573590617, + "loss": 3.8282, + "theoretical_loss": 3.8263433198382324, + "tokens_seen": 620756992 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039292470677260686, + "loss": 3.8165, + "theoretical_loss": 3.825673629836783, + "tokens_seen": 621805568 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.4897088408470154, + "objective/train/docs_used": 360813, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4187488555908203, + "objective/train/original_loss": 3.4187488555908203, + "objective/train/theoretical_loss": 3.8251723102500437, + "objective/train/tokens_used": 643052000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24293170869350433, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502057075500488, + "objective/train/weighted_lm_loss": 3.590527057647705, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9520784020423889, + "theoretical_loss": 3.8251723102500437, + "tokens_seen": 622592000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003927355278093076, + "loss": 3.8059, + "theoretical_loss": 3.8250053838140663, + "tokens_seen": 622854144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003925463488460083, + "loss": 3.8233, + "theoretical_loss": 3.8243385762335, + "tokens_seen": 623902720 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039235716988270905, + "loss": 3.7707, + "theoretical_loss": 3.8236732015889903, + "tokens_seen": 624951296 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.4751550257205963, + "objective/train/docs_used": 362724, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.419285535812378, + "objective/train/original_loss": 3.419285774230957, + "objective/train/theoretical_loss": 3.823092169924938, + "objective/train/tokens_used": 646328800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2330601066350937, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048699975013733, + "objective/train/weighted_lm_loss": 3.5884759426116943, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9552221894264221, + "theoretical_loss": 3.823092169924938, + "tokens_seen": 625868800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039216799091940975, + "loss": 3.8137, + "theoretical_loss": 3.8230092544047123, + "tokens_seen": 625999872 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003919788119561105, + "loss": 3.8197, + "theoretical_loss": 3.8223467292348943, + "tokens_seen": 627048448 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039178963299281124, + "loss": 3.8388, + "theoretical_loss": 3.8216856206636014, + "tokens_seen": 628097024 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.47925102710723877, + "objective/train/docs_used": 364484, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.616079568862915, + "objective/train/original_loss": 3.6160800457000732, + "objective/train/theoretical_loss": 3.8210259233045254, + "objective/train/tokens_used": 649605600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23632030189037323, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049126148223877, + "objective/train/weighted_lm_loss": 3.7934699058532715, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.956553041934967, + "theoretical_loss": 3.8210259233045254, + "tokens_seen": 629145600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039160045402951194, + "loss": 3.8423, + "theoretical_loss": 3.8210259233045254, + "tokens_seen": 629145600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003914112750662127, + "loss": 3.7631, + "theoretical_loss": 3.8203676318007704, + "tokens_seen": 630194176 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003912220961029134, + "loss": 3.7999, + "theoretical_loss": 3.819710740824646, + "tokens_seen": 631242752 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039103291713961407, + "loss": 3.7643, + "theoretical_loss": 3.8190552450774584, + "tokens_seen": 632291328 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.49162372946739197, + "objective/train/docs_used": 365542, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.556671619415283, + "objective/train/original_loss": 3.556671142578125, + "objective/train/theoretical_loss": 3.8189734059483165, + "objective/train/tokens_used": 652882400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2439402937889099, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504025220870972, + "objective/train/weighted_lm_loss": 3.735517740249634, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9563419818878174, + "theoretical_loss": 3.8189734059483165, + "tokens_seen": 632422400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003908437381763148, + "loss": 3.7508, + "theoretical_loss": 3.818401139289306, + "tokens_seen": 633339904 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003906545592130155, + "loss": 3.7896, + "theoretical_loss": 3.8177484182188737, + "tokens_seen": 634388480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003904653802497162, + "loss": 3.7664, + "theoretical_loss": 3.8170970766532326, + "tokens_seen": 635437056 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.4842968285083771, + "objective/train/docs_used": 367606, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4450738430023193, + "objective/train/original_loss": 3.4450740814208984, + "objective/train/theoretical_loss": 3.816934456201243, + "objective/train/tokens_used": 656159200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23944607377052307, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496467351913452, + "objective/train/weighted_lm_loss": 3.6150896549224854, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9515578150749207, + "theoretical_loss": 3.816934456201243, + "tokens_seen": 635699200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039027620128641695, + "loss": 3.7711, + "theoretical_loss": 3.816447109407641, + "tokens_seen": 636485632 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039008702232311765, + "loss": 3.7285, + "theoretical_loss": 3.815798511325341, + "tokens_seen": 637534208 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003898978433598184, + "loss": 3.7098, + "theoretical_loss": 3.8151512772773675, + "tokens_seen": 638582784 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.4804910719394684, + "objective/train/docs_used": 369550, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8657212257385254, + "objective/train/original_loss": 3.8657212257385254, + "objective/train/theoretical_loss": 3.8149089151324036, + "objective/train/tokens_used": 659436000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2408989667892456, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492734909057617, + "objective/train/weighted_lm_loss": 4.05525541305542, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9521989822387695, + "theoretical_loss": 3.8149089151324036, + "tokens_seen": 638976000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00038970866439651914, + "loss": 3.7187, + "theoretical_loss": 3.814505402162349, + "tokens_seen": 639631360 + }, + { + "epoch": 0.23, + "learning_rate": 0.00038951948543321983, + "loss": 3.7472, + "theoretical_loss": 3.813860880906316, + "tokens_seen": 640679936 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003893303064699206, + "loss": 3.6962, + "theoretical_loss": 3.813217708462508, + "tokens_seen": 641728512 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.4754175543785095, + "objective/train/docs_used": 371294, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.896268606185913, + "objective/train/original_loss": 3.896268606185913, + "objective/train/theoretical_loss": 3.81289662647547, + "objective/train/tokens_used": 662712800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23610548675060272, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487416982650757, + "objective/train/weighted_lm_loss": 4.083958625793457, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9514708518981934, + "theoretical_loss": 3.81289662647547, + "tokens_seen": 642252800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003891411275066213, + "loss": 3.7512, + "theoretical_loss": 3.8125758798111864, + "tokens_seen": 642777088 + }, + { + "epoch": 0.23, + "learning_rate": 0.000388951948543322, + "loss": 3.7466, + "theoretical_loss": 3.8119353899594413, + "tokens_seen": 643825664 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003887627695800227, + "loss": 3.7158, + "theoretical_loss": 3.8112962339410092, + "tokens_seen": 644874240 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.48057374358177185, + "objective/train/docs_used": 373458, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5815200805664062, + "objective/train/original_loss": 3.5815200805664062, + "objective/train/theoretical_loss": 3.8108974365706887, + "objective/train/tokens_used": 665989600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23741313815116882, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492641925811768, + "objective/train/weighted_lm_loss": 3.756553888320923, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.96083664894104, + "theoretical_loss": 3.8108974365706887, + "tokens_seen": 645529600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003885735906167234, + "loss": 3.761, + "theoretical_loss": 3.810658406816085, + "tokens_seen": 645922816 + }, + { + "epoch": 0.23, + "learning_rate": 0.00038838441165342416, + "loss": 3.7984, + "theoretical_loss": 3.8100219036711396, + "tokens_seen": 646971392 + }, + { + "epoch": 0.23, + "learning_rate": 0.00038819523269012485, + "loss": 3.6906, + "theoretical_loss": 3.809386719618737, + "tokens_seen": 648019968 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.48978403210639954, + "objective/train/docs_used": 375609, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.641045331954956, + "objective/train/original_loss": 3.641045331954956, + "objective/train/theoretical_loss": 3.808911194308436, + "objective/train/tokens_used": 669266400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24249985814094543, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502111911773682, + "objective/train/weighted_lm_loss": 3.823545217514038, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9532389640808105, + "theoretical_loss": 3.808911194308436, + "tokens_seen": 648806400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00038800605372682554, + "loss": 3.6938, + "theoretical_loss": 3.808752849797353, + "tokens_seen": 649068544 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003878168747635263, + "loss": 3.7689, + "theoretical_loss": 3.8081202893712005, + "tokens_seen": 650117120 + }, + { + "epoch": 0.23, + "learning_rate": 0.000387627695800227, + "loss": 3.7645, + "theoretical_loss": 3.807489033530046, + "tokens_seen": 651165696 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.4892757833003998, + "objective/train/docs_used": 377261, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.345855712890625, + "objective/train/original_loss": 3.345855712890625, + "objective/train/theoretical_loss": 3.806937751074268, + "objective/train/tokens_used": 672543200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24105043709278107, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501528978347778, + "objective/train/weighted_lm_loss": 3.5135161876678467, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 1.0038224458694458, + "theoretical_loss": 3.806937751074268, + "tokens_seen": 652083200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003874385168369278, + "loss": 3.7125, + "theoretical_loss": 3.806859077489038, + "tokens_seen": 652214272 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003872493378736285, + "loss": 3.6562, + "theoretical_loss": 3.806230416488531, + "tokens_seen": 653262848 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003870601589103292, + "loss": 3.7719, + "theoretical_loss": 3.8056030457939114, + "tokens_seen": 654311424 + }, + { + "debugging/Self-BLEU-5": 0.5265375629586004, + "debugging/distinct-1-grams": 0.7435820408094715, + "debugging/distinct-2-grams": 0.9558103821233092, + "debugging/entropy-1-grams": 5.931434510687563, + "debugging/entropy-2-grams": 6.886416755326388, + "debugging/length": 521.9230769230769, + "debugging/num_segments": 13, + "debugging/raw_token_scores_avg": 0.022742915898561478, + "debugging/raw_token_scores_std": 0.07841178774833679, + "epoch": 0.23, + "objective/train/advantage_avg": 0.4772515594959259, + "objective/train/docs_used": 379091, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.803166627883911, + "objective/train/original_loss": 3.803165912628174, + "objective/train/theoretical_loss": 3.804976960695429, + "objective/train/tokens_used": 675820000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23392102122306824, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489141941070557, + "objective/train/weighted_lm_loss": 3.988487958908081, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9569684863090515, + "theoretical_loss": 3.804976960695429, + "tokens_seen": 655360000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003868709799470299, + "loss": 3.7882, + "theoretical_loss": 3.804976960695429, + "tokens_seen": 655360000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003866818009837306, + "loss": 3.7545, + "theoretical_loss": 3.8043521565080236, + "tokens_seen": 656408576 + }, + { + "epoch": 0.23, + "learning_rate": 0.00038649262202043136, + "loss": 3.8025, + "theoretical_loss": 3.803728628571159, + "tokens_seen": 657457152 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038630344305713206, + "loss": 3.6804, + "theoretical_loss": 3.803106372248654, + "tokens_seen": 658505728 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.4905237853527069, + "objective/train/docs_used": 380902, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.74001145362854, + "objective/train/original_loss": 3.740011692047119, + "objective/train/theoretical_loss": 3.8030286793887647, + "objective/train/tokens_used": 679096800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24173638224601746, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502811670303345, + "objective/train/weighted_lm_loss": 3.927738666534424, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.984406590461731, + "theoretical_loss": 3.8030286793887647, + "tokens_seen": 658636800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038611426409383275, + "loss": 3.7401, + "theoretical_loss": 3.8024853829285172, + "tokens_seen": 659554304 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003859250851305335, + "loss": 3.755, + "theoretical_loss": 3.801865656022783, + "tokens_seen": 660602880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003857359061672342, + "loss": 3.731, + "theoretical_loss": 3.801247186967348, + "tokens_seen": 661651456 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.4816935658454895, + "objective/train/docs_used": 382599, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.499560832977295, + "objective/train/original_loss": 3.499560832977295, + "objective/train/theoretical_loss": 3.8010927657100013, + "objective/train/tokens_used": 682373600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24318966269493103, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494053363800049, + "objective/train/weighted_lm_loss": 3.671086072921753, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9515833854675293, + "theoretical_loss": 3.8010927657100013, + "tokens_seen": 661913600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003855467272039349, + "loss": 3.7489, + "theoretical_loss": 3.8006299712218086, + "tokens_seen": 662700032 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038535754824063563, + "loss": 3.8024, + "theoretical_loss": 3.8000140042693022, + "tokens_seen": 663748608 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003851683692773364, + "loss": 3.7713, + "theoretical_loss": 3.799399281616348, + "tokens_seen": 664797184 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.4905191957950592, + "objective/train/docs_used": 384593, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.0451765060424805, + "objective/train/original_loss": 4.0451765060424805, + "objective/train/theoretical_loss": 3.7991690805043445, + "objective/train/tokens_used": 685650400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24210280179977417, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050282597541809, + "objective/train/weighted_lm_loss": 4.24802303314209, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9715428352355957, + "theoretical_loss": 3.7991690805043445, + "tokens_seen": 665190400 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038497919031403713, + "loss": 3.7883, + "theoretical_loss": 3.798785798792688, + "tokens_seen": 665845760 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003847900113507378, + "loss": 3.7983, + "theoretical_loss": 3.798173551351132, + "tokens_seen": 666894336 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003846008323874385, + "loss": 3.7106, + "theoretical_loss": 3.797562534867401, + "tokens_seen": 667942912 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.47869381308555603, + "objective/train/docs_used": 386337, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.6440792083740234, + "objective/train/original_loss": 3.6440794467926025, + "objective/train/theoretical_loss": 3.797257486858361, + "objective/train/tokens_used": 688927200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2353726178407669, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490657091140747, + "objective/train/weighted_lm_loss": 3.8236608505249023, + "objective/train/weights_max": 1.0512152910232544, + "objective/train/weights_min": 0.9515171647071838, + "theoretical_loss": 3.797257486858361, + "tokens_seen": 668467200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038441165342413926, + "loss": 3.7644, + "theoretical_loss": 3.796952744939976, + "tokens_seen": 668991488 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038422247446083996, + "loss": 3.7476, + "theoretical_loss": 3.7963441771899418, + "tokens_seen": 670040064 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003840332954975407, + "loss": 3.7805, + "theoretical_loss": 3.795736827260839, + "tokens_seen": 671088640 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.4898402690887451, + "objective/train/docs_used": 388157, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.500314474105835, + "objective/train/original_loss": 3.500314235687256, + "objective/train/theoretical_loss": 3.795357850053097, + "objective/train/tokens_used": 692204000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24193520843982697, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502138137817383, + "objective/train/weighted_lm_loss": 3.6766164302825928, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9680190682411194, + "theoretical_loss": 3.795357850053097, + "tokens_seen": 671744000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003838441165342414, + "loss": 3.7275, + "theoretical_loss": 3.795130690818514, + "tokens_seen": 672137216 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003836549375709421, + "loss": 3.6728, + "theoretical_loss": 3.7945257635509657, + "tokens_seen": 673185792 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038346575860764284, + "loss": 3.6878, + "theoretical_loss": 3.793922041168204, + "tokens_seen": 674234368 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.48224732279777527, + "objective/train/docs_used": 390150, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4588170051574707, + "objective/train/original_loss": 3.4588167667388916, + "objective/train/theoretical_loss": 3.79347003751841, + "objective/train/tokens_used": 695480800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23753906786441803, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494320392608643, + "objective/train/weighted_lm_loss": 3.6302719116210938, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9518483877182007, + "theoretical_loss": 3.79347003751841, + "tokens_seen": 675020800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038327657964434353, + "loss": 3.6997, + "theoretical_loss": 3.7933195194020994, + "tokens_seen": 675282944 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003830874006810442, + "loss": 3.7589, + "theoretical_loss": 3.7927181940062407, + "tokens_seen": 676331520 + }, + { + "epoch": 0.24, + "learning_rate": 0.000382898221717745, + "loss": 3.6792, + "theoretical_loss": 3.792118060755787, + "tokens_seen": 677380096 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.4679737389087677, + "objective/train/docs_used": 392039, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2993521690368652, + "objective/train/original_loss": 3.299351930618286, + "objective/train/theoretical_loss": 3.7915939187884558, + "objective/train/tokens_used": 698757600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23201338946819305, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0479761362075806, + "objective/train/weighted_lm_loss": 3.4587182998657227, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9521719217300415, + "theoretical_loss": 3.7915939187884558, + "tokens_seen": 678297600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003827090427544457, + "loss": 3.7225, + "theoretical_loss": 3.7915191154473287, + "tokens_seen": 678428672 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038251986379114647, + "loss": 3.668, + "theoretical_loss": 3.790921353898745, + "tokens_seen": 679477248 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038233068482784716, + "loss": 3.6388, + "theoretical_loss": 3.790324771949063, + "tokens_seen": 680525824 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.4891808032989502, + "objective/train/docs_used": 393961, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5404582023620605, + "objective/train/original_loss": 3.5404579639434814, + "objective/train/theoretical_loss": 3.7897293654583164, + "objective/train/tokens_used": 702034400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24321125447750092, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501543283462524, + "objective/train/weighted_lm_loss": 3.718618869781494, + "objective/train/weights_max": 1.0512158870697021, + "objective/train/weights_min": 0.9517624974250793, + "theoretical_loss": 3.7897293654583164, + "tokens_seen": 681574400 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038214150586454785, + "loss": 3.6814, + "theoretical_loss": 3.7897293654583164, + "tokens_seen": 681574400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003819523269012486, + "loss": 3.5899, + "theoretical_loss": 3.7891351303074123, + "tokens_seen": 682622976 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003817631479379493, + "loss": 3.5557, + "theoretical_loss": 3.7885420623979886, + "tokens_seen": 683671552 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038157396897465004, + "loss": 3.6581, + "theoretical_loss": 3.787950157652282, + "tokens_seen": 684720128 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.47339844703674316, + "objective/train/docs_used": 395905, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.22224497795105, + "objective/train/original_loss": 3.222245216369629, + "objective/train/theoretical_loss": 3.7878762511417223, + "objective/train/tokens_used": 705311200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23162895441055298, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048516869544983, + "objective/train/weighted_lm_loss": 3.378674268722534, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9526793956756592, + "theoretical_loss": 3.7878762511417223, + "tokens_seen": 684851200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00038138479001135074, + "loss": 3.6211, + "theoretical_loss": 3.78735941201299, + "tokens_seen": 685768704 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038119561104805143, + "loss": 3.6447, + "theoretical_loss": 3.786769821443141, + "tokens_seen": 686817280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003810064320847522, + "loss": 3.6458, + "theoretical_loss": 3.7861813819259575, + "tokens_seen": 687865856 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.46620890498161316, + "objective/train/docs_used": 397858, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2505011558532715, + "objective/train/original_loss": 3.2505016326904297, + "objective/train/theoretical_loss": 3.7860344514298374, + "objective/train/tokens_used": 708588000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23270408809185028, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478036403656006, + "objective/train/weighted_lm_loss": 3.4039294719696045, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9517877697944641, + "theoretical_loss": 3.7860344514298374, + "tokens_seen": 688128000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038081725312145287, + "loss": 3.6586, + "theoretical_loss": 3.7855940894647278, + "tokens_seen": 688914432 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003806280741581537, + "loss": 3.6271, + "theoretical_loss": 3.785007940082673, + "tokens_seen": 689963008 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038043889519485437, + "loss": 3.6519, + "theoretical_loss": 3.7844229298228176, + "tokens_seen": 691011584 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.4676517844200134, + "objective/train/docs_used": 398758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.232473850250244, + "objective/train/original_loss": 3.2324740886688232, + "objective/train/theoretical_loss": 3.7842038438510803, + "objective/train/tokens_used": 711864800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22986283898353577, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0479334592819214, + "objective/train/weighted_lm_loss": 3.3902587890625, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9528036713600159, + "theoretical_loss": 3.7842038438510803, + "tokens_seen": 691404800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00038024971623155506, + "loss": 3.76, + "theoretical_loss": 3.7838390547478635, + "tokens_seen": 692060160 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003800605372682558, + "loss": 3.6894, + "theoretical_loss": 3.78325631094006, + "tokens_seen": 693108736 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003798713583049565, + "loss": 3.6554, + "theoretical_loss": 3.782674694501079, + "tokens_seen": 694157312 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.4899442195892334, + "objective/train/docs_used": 400726, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5794906616210938, + "objective/train/original_loss": 3.5794901847839355, + "objective/train/theoretical_loss": 3.782384307831949, + "objective/train/tokens_used": 715141600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2438521683216095, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050234079360962, + "objective/train/weighted_lm_loss": 3.758875846862793, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9520761966705322, + "theoretical_loss": 3.782384307831949, + "tokens_seen": 694681600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003796821793416572, + "loss": 3.7036, + "theoretical_loss": 3.782094201551887, + "tokens_seen": 695205888 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037949300037835794, + "loss": 3.704, + "theoretical_loss": 3.7815148282326243, + "tokens_seen": 696254464 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037930382141505864, + "loss": 3.6473, + "theoretical_loss": 3.780936570702478, + "tokens_seen": 697303040 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.48783382773399353, + "objective/train/docs_used": 402482, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4494855403900146, + "objective/train/original_loss": 3.4494853019714355, + "objective/train/theoretical_loss": 3.780575724658811, + "objective/train/tokens_used": 718418400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24229206144809723, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500150918960571, + "objective/train/weighted_lm_loss": 3.621917963027954, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9659635424613953, + "theoretical_loss": 3.780575724658811, + "tokens_seen": 697958400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003791146424517594, + "loss": 3.6276, + "theoretical_loss": 3.780359425139562, + "tokens_seen": 698351616 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003789254634884601, + "loss": 3.6618, + "theoretical_loss": 3.7797833877407947, + "tokens_seen": 699400192 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037873628452516077, + "loss": 3.5677, + "theoretical_loss": 3.779208454721779, + "tokens_seen": 700448768 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.4900723993778229, + "objective/train/docs_used": 404817, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.499330759048462, + "objective/train/original_loss": 3.499330997467041, + "objective/train/theoretical_loss": 3.778777977440649, + "objective/train/tokens_used": 721695200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24242709577083588, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502395629882812, + "objective/train/weighted_lm_loss": 3.675553321838379, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9576201438903809, + "theoretical_loss": 3.778777977440649, + "tokens_seen": 701235200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003785471055618615, + "loss": 3.6181, + "theoretical_loss": 3.7786346223166802, + "tokens_seen": 701497344 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037835792659856227, + "loss": 3.5818, + "theoretical_loss": 3.778061886778111, + "tokens_seen": 702545920 + }, + { + "epoch": 0.25, + "learning_rate": 0.000378168747635263, + "loss": 3.6, + "theoretical_loss": 3.7774902443770113, + "tokens_seen": 703594496 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.4579163193702698, + "objective/train/docs_used": 406758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1486806869506836, + "objective/train/original_loss": 3.1486809253692627, + "objective/train/theoretical_loss": 3.7769909510727144, + "objective/train/tokens_used": 724972000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23327672481536865, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.046976923942566, + "objective/train/weighted_lm_loss": 3.3008174896240234, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9533183574676514, + "theoretical_loss": 3.7769909510727144, + "tokens_seen": 704512000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003779795686719637, + "loss": 3.6163, + "theoretical_loss": 3.776919691402532, + "tokens_seen": 704643072 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003777903897086644, + "loss": 3.6061, + "theoretical_loss": 3.7763502241619205, + "tokens_seen": 705691648 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037760121074536515, + "loss": 3.6111, + "theoretical_loss": 3.7757818389804023, + "tokens_seen": 706740224 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.48214995861053467, + "objective/train/docs_used": 408602, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.046311140060425, + "objective/train/original_loss": 3.046311140060425, + "objective/train/theoretical_loss": 3.775214532201071, + "objective/train/tokens_used": 728248800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23837196826934814, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049426555633545, + "objective/train/weighted_lm_loss": 3.1969616413116455, + "objective/train/weights_max": 1.0512155294418335, + "objective/train/weights_min": 0.953264057636261, + "theoretical_loss": 3.775214532201071, + "tokens_seen": 707788800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037741203178206584, + "loss": 3.4984, + "theoretical_loss": 3.775214532201071, + "tokens_seen": 707788800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00037722285281876653, + "loss": 3.5725, + "theoretical_loss": 3.774648300184772, + "tokens_seen": 708837376 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003770336738554673, + "loss": 3.5147, + "theoretical_loss": 3.774083139309993, + "tokens_seen": 709885952 + }, + { + "epoch": 0.25, + "learning_rate": 0.000376844494892168, + "loss": 3.5496, + "theoretical_loss": 3.7735190459727486, + "tokens_seen": 710934528 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.489047646522522, + "objective/train/docs_used": 410204, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3076088428497314, + "objective/train/original_loss": 3.3076090812683105, + "objective/train/theoretical_loss": 3.7734486091880095, + "objective/train/tokens_used": 731525600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24238334596157074, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050136923789978, + "objective/train/weighted_lm_loss": 3.473520517349243, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9621482491493225, + "theoretical_loss": 3.7734486091880095, + "tokens_seen": 711065600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003766553159288687, + "loss": 3.5332, + "theoretical_loss": 3.7729560165864746, + "tokens_seen": 711983104 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003764661369655694, + "loss": 3.6379, + "theoretical_loss": 3.7723940475819147, + "tokens_seen": 713031680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003762769580022701, + "loss": 3.569, + "theoretical_loss": 3.7718331354070127, + "tokens_seen": 714080256 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.477344810962677, + "objective/train/docs_used": 412084, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.31864595413208, + "objective/train/original_loss": 3.31864595413208, + "objective/train/theoretical_loss": 3.7716930720782935, + "objective/train/tokens_used": 734802400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24045908451080322, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489565134048462, + "objective/train/weighted_lm_loss": 3.480764627456665, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.952606737613678, + "theoretical_loss": 3.7716930720782935, + "tokens_seen": 714342400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003760877790389709, + "loss": 3.6623, + "theoretical_loss": 3.771273276526805, + "tokens_seen": 715128832 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003758986000756716, + "loss": 3.6001, + "theoretical_loss": 3.770714467423313, + "tokens_seen": 716177408 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037570942111237235, + "loss": 3.6465, + "theoretical_loss": 3.7701567045954367, + "tokens_seen": 717225984 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.47307994961738586, + "objective/train/docs_used": 413912, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0646326541900635, + "objective/train/original_loss": 3.0646326541900635, + "objective/train/theoretical_loss": 3.769947812566226, + "objective/train/tokens_used": 738079200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23451007902622223, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484997034072876, + "objective/train/weighted_lm_loss": 3.2135608196258545, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9514219760894775, + "theoretical_loss": 3.769947812566226, + "tokens_seen": 717619200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037552024214907305, + "loss": 3.6583, + "theoretical_loss": 3.76959998455885, + "tokens_seen": 718274560 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037533106318577374, + "loss": 3.6294, + "theoretical_loss": 3.7690443038458943, + "tokens_seen": 719323136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003751418842224745, + "loss": 3.5343, + "theoretical_loss": 3.7684896590054757, + "tokens_seen": 720371712 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.48498988151550293, + "objective/train/docs_used": 415834, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3692052364349365, + "objective/train/original_loss": 3.3692054748535156, + "objective/train/theoretical_loss": 3.7682127239635053, + "objective/train/tokens_used": 741356000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24069416522979736, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497223138809204, + "objective/train/weighted_lm_loss": 3.5365824699401855, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9525206089019775, + "theoretical_loss": 3.7682127239635053, + "tokens_seen": 720896000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003749527052591752, + "loss": 3.646, + "theoretical_loss": 3.767936046602963, + "tokens_seen": 721420288 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003747635262958759, + "loss": 3.6593, + "theoretical_loss": 3.7673834632200824, + "tokens_seen": 722468864 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003745743473325766, + "loss": 3.675, + "theoretical_loss": 3.76683190545482, + "tokens_seen": 723517440 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.4824288487434387, + "objective/train/docs_used": 417692, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7827279567718506, + "objective/train/original_loss": 3.7827279567718506, + "objective/train/theoretical_loss": 3.7664877011678484, + "objective/train/tokens_used": 744632800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2396303415298462, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049461007118225, + "objective/train/weighted_lm_loss": 3.9698286056518555, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9542454481124878, + "theoretical_loss": 3.7664877011678484, + "tokens_seen": 724172800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003743851683692773, + "loss": 3.6335, + "theoretical_loss": 3.766281369921316, + "tokens_seen": 724566016 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037419598940597806, + "loss": 3.6526, + "theoretical_loss": 3.765731853249771, + "tokens_seen": 725614592 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037400681044267876, + "loss": 3.524, + "theoretical_loss": 3.7651833520863396, + "tokens_seen": 726663168 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.48516714572906494, + "objective/train/docs_used": 419961, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.715956211090088, + "objective/train/original_loss": 3.715956211090088, + "objective/train/theoretical_loss": 3.7647726406323665, + "objective/train/tokens_used": 747909600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24163493514060974, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049744963645935, + "objective/train/weighted_lm_loss": 3.8999452590942383, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9520505666732788, + "theoretical_loss": 3.7647726406323665, + "tokens_seen": 727449600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003738176314793795, + "loss": 3.6154, + "theoretical_loss": 3.7646358630930385, + "tokens_seen": 727711744 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037362845251608025, + "loss": 3.5866, + "theoretical_loss": 3.7640893829476445, + "tokens_seen": 728760320 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037343927355278095, + "loss": 3.6157, + "theoretical_loss": 3.7635439083435998, + "tokens_seen": 729808896 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.47344446182250977, + "objective/train/docs_used": 421797, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.6491940021514893, + "objective/train/original_loss": 3.6491942405700684, + "objective/train/theoretical_loss": 3.7630674403356625, + "objective/train/tokens_used": 751186400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23705269396305084, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485491752624512, + "objective/train/weighted_lm_loss": 3.8254892826080322, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9523929953575134, + "theoretical_loss": 3.7630674403356625, + "tokens_seen": 730726400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003732500945894817, + "loss": 3.6271, + "theoretical_loss": 3.762999435989914, + "tokens_seen": 730857472 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003730609156261824, + "loss": 3.4846, + "theoretical_loss": 3.76245596261107, + "tokens_seen": 731906048 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003728717366628831, + "loss": 3.5969, + "theoretical_loss": 3.7619134849469296, + "tokens_seen": 732954624 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.47209614515304565, + "objective/train/docs_used": 423825, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4763729572296143, + "objective/train/original_loss": 3.4763731956481934, + "objective/train/theoretical_loss": 3.7613719997526367, + "objective/train/tokens_used": 754463200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23541226983070374, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484060049057007, + "objective/train/weighted_lm_loss": 3.644237756729126, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9522672295570374, + "theoretical_loss": 3.7613719997526367, + "tokens_seen": 734003200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037268255769958383, + "loss": 3.6206, + "theoretical_loss": 3.7613719997526367, + "tokens_seen": 734003200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003724933787362845, + "loss": 3.6467, + "theoretical_loss": 3.760831503798527, + "tokens_seen": 735051776 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003723041997729852, + "loss": 3.6167, + "theoretical_loss": 3.760291993870034, + "tokens_seen": 736100352 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037211502080968596, + "loss": 3.5772, + "theoretical_loss": 3.759753466767597, + "tokens_seen": 737148928 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.48783084750175476, + "objective/train/docs_used": 425209, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.385236978530884, + "objective/train/original_loss": 3.3852367401123047, + "objective/train/theoretical_loss": 3.7596862198259773, + "objective/train/tokens_used": 757740000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24238182604312897, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500152111053467, + "objective/train/weighted_lm_loss": 3.554072380065918, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.951784074306488, + "theoretical_loss": 3.7596862198259773, + "tokens_seen": 737280000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037192584184638666, + "loss": 3.5612, + "theoretical_loss": 3.7592159193065697, + "tokens_seen": 738197504 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003717366628830874, + "loss": 3.6211, + "theoretical_loss": 3.758679348317131, + "tokens_seen": 739246080 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037154748391978815, + "loss": 3.6074, + "theoretical_loss": 3.7581437506441926, + "tokens_seen": 740294656 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.4911670982837677, + "objective/train/docs_used": 428014, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.293811559677124, + "objective/train/original_loss": 3.293811559677124, + "objective/train/theoretical_loss": 3.75801000293832, + "objective/train/tokens_used": 761016800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2433631718158722, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050353765487671, + "objective/train/weighted_lm_loss": 3.4594292640686035, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9623913764953613, + "theoretical_loss": 3.75801000293832, + "tokens_seen": 740556800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00037135830495648884, + "loss": 3.6303, + "theoretical_loss": 3.7576091231473114, + "tokens_seen": 741343232 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003711691259931896, + "loss": 3.5686, + "theoretical_loss": 3.7570754627006018, + "tokens_seen": 742391808 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003709799470298903, + "loss": 3.5571, + "theoretical_loss": 3.756542766192646, + "tokens_seen": 743440384 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.4925093352794647, + "objective/train/docs_used": 429315, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3616669178009033, + "objective/train/original_loss": 3.3616676330566406, + "objective/train/theoretical_loss": 3.756343252885055, + "objective/train/tokens_used": 764293600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24342967569828033, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504884719848633, + "objective/train/weighted_lm_loss": 3.531308650970459, + "objective/train/weights_max": 1.0512197017669678, + "objective/train/weights_min": 0.9594744443893433, + "theoretical_loss": 3.756343252885055, + "tokens_seen": 743833600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00037079076806659103, + "loss": 3.5356, + "theoretical_loss": 3.7560110305264054, + "tokens_seen": 744488960 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003706015891032917, + "loss": 3.5672, + "theoretical_loss": 3.7554802526191393, + "tokens_seen": 745537536 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003704124101399924, + "loss": 3.4796, + "theoretical_loss": 3.7549504294023137, + "tokens_seen": 746586112 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.49480339884757996, + "objective/train/docs_used": 431156, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.175428628921509, + "objective/train/original_loss": 3.175428867340088, + "objective/train/theoretical_loss": 3.7546858748477634, + "objective/train/tokens_used": 767570400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2462376207113266, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050732135772705, + "objective/train/weighted_lm_loss": 3.3367059230804443, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9542807936668396, + "theoretical_loss": 3.7546858748477634, + "tokens_seen": 747110400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00037022323117669317, + "loss": 3.5415, + "theoretical_loss": 3.7544215578215177, + "tokens_seen": 747634688 + }, + { + "epoch": 0.27, + "learning_rate": 0.00037003405221339386, + "loss": 3.5867, + "theoretical_loss": 3.75389363483638, + "tokens_seen": 748683264 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003698448732500946, + "loss": 3.4893, + "theoretical_loss": 3.753366657420483, + "tokens_seen": 749731840 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.48980623483657837, + "objective/train/docs_used": 433094, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1637654304504395, + "objective/train/original_loss": 3.1637656688690186, + "objective/train/theoretical_loss": 3.7530377753682695, + "objective/train/tokens_used": 770847200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2416664958000183, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502090454101562, + "objective/train/weighted_lm_loss": 3.3235599994659424, + "objective/train/weights_max": 1.0512202978134155, + "objective/train/weights_min": 0.95468670129776, + "theoretical_loss": 3.7530377753682695, + "tokens_seen": 750387200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003696556942867953, + "loss": 3.555, + "theoretical_loss": 3.75284062256128, + "tokens_seen": 750780416 + }, + { + "epoch": 0.27, + "learning_rate": 0.000369466515323496, + "loss": 3.5191, + "theoretical_loss": 3.7523155272600137, + "tokens_seen": 751828992 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003692773363601968, + "loss": 3.6015, + "theoretical_loss": 3.751791368531631, + "tokens_seen": 752877568 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.48759615421295166, + "objective/train/docs_used": 434283, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.427870988845825, + "objective/train/original_loss": 3.4278712272644043, + "objective/train/theoretical_loss": 3.7513988623232883, + "objective/train/tokens_used": 774124000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2424042820930481, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499919652938843, + "objective/train/weighted_lm_loss": 3.5984747409820557, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9514016509056091, + "theoretical_loss": 3.7513988623232883, + "tokens_seen": 753664000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003690881573968975, + "loss": 3.6279, + "theoretical_loss": 3.7512681434047033, + "tokens_seen": 753926144 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003688989784335982, + "loss": 3.6342, + "theoretical_loss": 3.7507458489213477, + "tokens_seen": 754974720 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036870979947029893, + "loss": 3.5992, + "theoretical_loss": 3.7502244821371407, + "tokens_seen": 756023296 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.486615926027298, + "objective/train/docs_used": 435567, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.396214723587036, + "objective/train/original_loss": 3.396214485168457, + "objective/train/theoretical_loss": 3.7497690448996552, + "objective/train/tokens_used": 777400800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24091613292694092, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498863458633423, + "objective/train/weighted_lm_loss": 3.564488410949707, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9564319849014282, + "theoretical_loss": 3.7497690448996552, + "tokens_seen": 756940800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003685206205069996, + "loss": 3.5721, + "theoretical_loss": 3.7497040401210446, + "tokens_seen": 757071872 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003683314415437004, + "loss": 3.6079, + "theoretical_loss": 3.7491845199553238, + "tokens_seen": 758120448 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036814226258040107, + "loss": 3.6028, + "theoretical_loss": 3.748665918735468, + "tokens_seen": 759169024 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.4809083640575409, + "objective/train/docs_used": 437507, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2098705768585205, + "objective/train/original_loss": 3.2098708152770996, + "objective/train/theoretical_loss": 3.748148233570115, + "objective/train/tokens_used": 780677600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23865434527397156, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493038892745972, + "objective/train/weighted_lm_loss": 3.3677401542663574, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9517086148262024, + "theoretical_loss": 3.748148233570115, + "tokens_seen": 760217600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036795308361710176, + "loss": 3.5545, + "theoretical_loss": 3.748148233570115, + "tokens_seen": 760217600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003677639046538025, + "loss": 3.6647, + "theoretical_loss": 3.74763146158097, + "tokens_seen": 761266176 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003675747256905032, + "loss": 3.6744, + "theoretical_loss": 3.747115599902733, + "tokens_seen": 762314752 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036738554672720395, + "loss": 3.5857, + "theoretical_loss": 3.746600645683017, + "tokens_seen": 763363328 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.4819650948047638, + "objective/train/docs_used": 439068, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3648343086242676, + "objective/train/original_loss": 3.364834785461426, + "objective/train/theoretical_loss": 3.7465363400696683, + "objective/train/tokens_used": 783954400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23789268732070923, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494056940078735, + "objective/train/weighted_lm_loss": 3.5312013626098633, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9530982971191406, + "theoretical_loss": 3.7465363400696683, + "tokens_seen": 763494400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036719636776390464, + "loss": 3.6457, + "theoretical_loss": 3.7460865960822782, + "tokens_seen": 764411904 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003670071888006054, + "loss": 3.6536, + "theoretical_loss": 3.745573448273736, + "tokens_seen": 765460480 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036681800983730614, + "loss": 3.623, + "theoretical_loss": 3.7450611994433, + "tokens_seen": 766509056 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.4673716723918915, + "objective/train/docs_used": 441051, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.6296682357788086, + "objective/train/original_loss": 3.6296682357788086, + "objective/train/theoretical_loss": 3.7449332773724455, + "objective/train/tokens_used": 787231200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22804482281208038, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047896146774292, + "objective/train/weighted_lm_loss": 3.8018686771392822, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9860634207725525, + "theoretical_loss": 3.7449332773724455, + "tokens_seen": 766771200 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036662883087400683, + "loss": 3.6236, + "theoretical_loss": 3.7445498467894947, + "tokens_seen": 767557632 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003664396519107075, + "loss": 3.6475, + "theoretical_loss": 3.7440393875233893, + "tokens_seen": 768606208 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036625047294740827, + "loss": 3.5919, + "theoretical_loss": 3.7435298188685184, + "tokens_seen": 769654784 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.4872644245624542, + "objective/train/docs_used": 442749, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4281868934631348, + "objective/train/original_loss": 3.4281868934631348, + "objective/train/theoretical_loss": 3.7433389596691073, + "objective/train/tokens_used": 790508000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24073950946331024, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499500036239624, + "objective/train/weighted_lm_loss": 3.5999956130981445, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9553505778312683, + "theoretical_loss": 3.7433389596691073, + "tokens_seen": 770048000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036606129398410897, + "loss": 3.6758, + "theoretical_loss": 3.7430211380608167, + "tokens_seen": 770703360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003658721150208097, + "loss": 3.617, + "theoretical_loss": 3.74251334234854, + "tokens_seen": 771751936 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003656829360575104, + "loss": 3.6863, + "theoretical_loss": 3.742006428992198, + "tokens_seen": 772800512 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.46843963861465454, + "objective/train/docs_used": 444718, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.53401780128479, + "objective/train/original_loss": 3.534018039703369, + "objective/train/theoretical_loss": 3.7417533023447445, + "objective/train/tokens_used": 793784800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23737366497516632, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0480501651763916, + "objective/train/weighted_lm_loss": 3.7014524936676025, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9519520401954651, + "theoretical_loss": 3.7417533023447445, + "tokens_seen": 773324800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003654937570942111, + "loss": 3.4944, + "theoretical_loss": 3.741500395264481, + "tokens_seen": 773849088 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036530457813091185, + "loss": 3.5637, + "theoretical_loss": 3.7409952384501923, + "tokens_seen": 774897664 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036511539916761254, + "loss": 3.651, + "theoretical_loss": 3.740490955846173, + "tokens_seen": 775946240 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.48415154218673706, + "objective/train/docs_used": 446660, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4447832107543945, + "objective/train/original_loss": 3.4447832107543945, + "objective/train/theoretical_loss": 3.7401762219572765, + "objective/train/tokens_used": 797061600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2410360723733902, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049640417098999, + "objective/train/weighted_lm_loss": 3.616288661956787, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9529085755348206, + "theoretical_loss": 3.7401762219572765, + "tokens_seen": 776601600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003649262202043133, + "loss": 3.6152, + "theoretical_loss": 3.739987544761238, + "tokens_seen": 776994816 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036473704124101404, + "loss": 3.6166, + "theoretical_loss": 3.7394850025161026, + "tokens_seen": 778043392 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036454786227771473, + "loss": 3.6012, + "theoretical_loss": 3.738983326443316, + "tokens_seen": 779091968 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.4765093922615051, + "objective/train/docs_used": 448564, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2798614501953125, + "objective/train/original_loss": 3.2798619270324707, + "objective/train/theoretical_loss": 3.73860763621633, + "objective/train/tokens_used": 800338400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23574241995811462, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048849105834961, + "objective/train/weighted_lm_loss": 3.4406769275665283, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9636579751968384, + "theoretical_loss": 3.73860763621633, + "tokens_seen": 779878400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003643586833144155, + "loss": 3.6718, + "theoretical_loss": 3.7384825138871944, + "tokens_seen": 780140544 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036416950435111617, + "loss": 3.5764, + "theoretical_loss": 3.737982562203749, + "tokens_seen": 781189120 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036398032538781686, + "loss": 3.5433, + "theoretical_loss": 3.737483468760624, + "tokens_seen": 782237696 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.48366644978523254, + "objective/train/docs_used": 450552, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.120129108428955, + "objective/train/original_loss": 3.120129108428955, + "objective/train/theoretical_loss": 3.737047463962579, + "objective/train/tokens_used": 803615200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23800888657569885, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495764017105103, + "objective/train/weighted_lm_loss": 3.2741003036499023, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9536738991737366, + "theoretical_loss": 3.737047463962579, + "tokens_seen": 783155200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003637911464245176, + "loss": 3.5818, + "theoretical_loss": 3.7369852309370275, + "tokens_seen": 783286272 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003636019674612183, + "loss": 3.5907, + "theoretical_loss": 3.736487846123663, + "tokens_seen": 784334848 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036341278849791905, + "loss": 3.5826, + "theoretical_loss": 3.7359913117226684, + "tokens_seen": 785383424 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.4790653884410858, + "objective/train/docs_used": 452240, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.210500955581665, + "objective/train/original_loss": 3.210501194000244, + "objective/train/theoretical_loss": 3.735495625147548, + "objective/train/tokens_used": 806892000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23576776683330536, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491048097610474, + "objective/train/weighted_lm_loss": 3.3677072525024414, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9538201689720154, + "theoretical_loss": 3.735495625147548, + "tokens_seen": 786432000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036322360953461975, + "loss": 3.6123, + "theoretical_loss": 3.735495625147548, + "tokens_seen": 786432000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036303443057132044, + "loss": 3.5839, + "theoretical_loss": 3.735000783823107, + "tokens_seen": 787480576 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003628452516080212, + "loss": 3.5651, + "theoretical_loss": 3.7345067851853897, + "tokens_seen": 788529152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003626560726447219, + "loss": 3.5895, + "theoretical_loss": 3.7340136266816133, + "tokens_seen": 789577728 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.4898260533809662, + "objective/train/docs_used": 454142, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3126230239868164, + "objective/train/original_loss": 3.3126230239868164, + "objective/train/theoretical_loss": 3.7339520408138513, + "objective/train/tokens_used": 810168800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2416979968547821, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502111911773682, + "objective/train/weighted_lm_loss": 3.478757619857788, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9954109191894531, + "theoretical_loss": 3.7339520408138513, + "tokens_seen": 789708800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003624668936814227, + "loss": 3.569, + "theoretical_loss": 3.733521305770105, + "tokens_seen": 790626304 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003622777147181234, + "loss": 3.5766, + "theoretical_loss": 3.7330298199202394, + "tokens_seen": 791674880 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036208853575482407, + "loss": 3.5995, + "theoretical_loss": 3.7325391666123764, + "tokens_seen": 792723456 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.4768815040588379, + "objective/train/docs_used": 455856, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8263907432556152, + "objective/train/original_loss": 2.8263907432556152, + "objective/train/theoretical_loss": 3.732416633075869, + "objective/train/tokens_used": 813445600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2372640073299408, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488940477371216, + "objective/train/weighted_lm_loss": 2.9625203609466553, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9526212215423584, + "theoretical_loss": 3.732416633075869, + "tokens_seen": 792985600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003618993567915248, + "loss": 3.558, + "theoretical_loss": 3.7320493433377973, + "tokens_seen": 793772032 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003617101778282255, + "loss": 3.6053, + "theoretical_loss": 3.731560347598646, + "tokens_seen": 794820608 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003615209988649262, + "loss": 3.5659, + "theoretical_loss": 3.7310721769078636, + "tokens_seen": 795869184 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.48475295305252075, + "objective/train/docs_used": 457654, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2081458568573, + "objective/train/original_loss": 3.2081456184387207, + "objective/train/theoretical_loss": 3.7308893251008413, + "objective/train/tokens_used": 816722400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24060019850730896, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496982336044312, + "objective/train/weighted_lm_loss": 3.367356061935425, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9561263918876648, + "theoretical_loss": 3.7308893251008413, + "tokens_seen": 796262400 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036133181990162695, + "loss": 3.5876, + "theoretical_loss": 3.730584828789132, + "tokens_seen": 796917760 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036114264093832765, + "loss": 3.543, + "theoretical_loss": 3.7300983007768105, + "tokens_seen": 797966336 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003609534619750284, + "loss": 3.5454, + "theoretical_loss": 3.729612590415876, + "tokens_seen": 799014912 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.4876490533351898, + "objective/train/docs_used": 459747, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4556379318237305, + "objective/train/original_loss": 3.4556384086608887, + "objective/train/theoretical_loss": 3.729370041090373, + "objective/train/tokens_used": 819999200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24297496676445007, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500001907348633, + "objective/train/weighted_lm_loss": 3.628729820251465, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9550496935844421, + "theoretical_loss": 3.729370041090373, + "tokens_seen": 799539200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003607642830117291, + "loss": 3.525, + "theoretical_loss": 3.7291276952618655, + "tokens_seen": 800063488 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003605751040484298, + "loss": 3.5934, + "theoretical_loss": 3.7286436128808145, + "tokens_seen": 801112064 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036038592508513053, + "loss": 3.5227, + "theoretical_loss": 3.7281603408491995, + "tokens_seen": 802160640 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.4763462245464325, + "objective/train/docs_used": 461200, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.19844388961792, + "objective/train/original_loss": 3.19844388961792, + "objective/train/theoretical_loss": 3.727858706262338, + "objective/train/tokens_used": 823276000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2413131445646286, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048860788345337, + "objective/train/weighted_lm_loss": 3.3524065017700195, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9517171382904053, + "theoretical_loss": 3.727858706262338, + "tokens_seen": 802816000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003601967461218313, + "loss": 3.5443, + "theoretical_loss": 3.72767787675388, + "tokens_seen": 803209216 + }, + { + "epoch": 0.29, + "learning_rate": 0.000360007567158532, + "loss": 3.5661, + "theoretical_loss": 3.727196218192039, + "tokens_seen": 804257792 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003598183881952327, + "loss": 3.4753, + "theoretical_loss": 3.7267153627711256, + "tokens_seen": 805306368 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.4879932701587677, + "objective/train/docs_used": 463166, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.404329299926758, + "objective/train/original_loss": 3.404329299926758, + "objective/train/theoretical_loss": 3.7263552468331667, + "objective/train/tokens_used": 826552800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24394917488098145, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050039291381836, + "objective/train/weighted_lm_loss": 3.575188636779785, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9521428942680359, + "theoretical_loss": 3.7263552468331667, + "tokens_seen": 806092800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003596292092319334, + "loss": 3.58, + "theoretical_loss": 3.7262353081088015, + "tokens_seen": 806354944 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035944003026863416, + "loss": 3.4629, + "theoretical_loss": 3.725756051832878, + "tokens_seen": 807403520 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035925085130533485, + "loss": 3.5236, + "theoretical_loss": 3.7252775915812655, + "tokens_seen": 808452096 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.47361043095588684, + "objective/train/docs_used": 465044, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0562472343444824, + "objective/train/original_loss": 3.0562477111816406, + "objective/train/theoretical_loss": 3.724859590000527, + "objective/train/tokens_used": 829829600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2366064488887787, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485633611679077, + "objective/train/weighted_lm_loss": 3.204601287841797, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9514092803001404, + "theoretical_loss": 3.724859590000527, + "tokens_seen": 809369600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003590616723420356, + "loss": 3.551, + "theoretical_loss": 3.724799925001913, + "tokens_seen": 809500672 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003588724933787363, + "loss": 3.5098, + "theoretical_loss": 3.7243230497527553, + "tokens_seen": 810549248 + }, + { + "epoch": 0.29, + "learning_rate": 0.000358683314415437, + "loss": 3.4784, + "theoretical_loss": 3.723846963501657, + "tokens_seen": 811597824 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.4856931269168854, + "objective/train/docs_used": 466971, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.61726450920105, + "objective/train/original_loss": 3.617264986038208, + "objective/train/theoretical_loss": 3.7233716639263568, + "objective/train/tokens_used": 833106400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2407991737127304, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497934818267822, + "objective/train/weighted_lm_loss": 3.7969565391540527, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9524025917053223, + "theoretical_loss": 3.7233716639263568, + "tokens_seen": 812646400 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035849413545213773, + "loss": 3.4899, + "theoretical_loss": 3.7233716639263568, + "tokens_seen": 812646400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003583049564888384, + "loss": 3.5225, + "theoretical_loss": 3.7228971487144147, + "tokens_seen": 813694976 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003581157775255391, + "loss": 3.5211, + "theoretical_loss": 3.722423415563156, + "tokens_seen": 814743552 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003579265985622399, + "loss": 3.5231, + "theoretical_loss": 3.7219504621796187, + "tokens_seen": 815792128 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.47521209716796875, + "objective/train/docs_used": 469033, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4922797679901123, + "objective/train/original_loss": 3.4922800064086914, + "objective/train/theoretical_loss": 3.7218913977202703, + "objective/train/tokens_used": 836383200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23987308144569397, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048740267753601, + "objective/train/weighted_lm_loss": 3.6617021560668945, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9514144659042358, + "theoretical_loss": 3.7218913977202703, + "tokens_seen": 815923200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003577374195989406, + "loss": 3.4287, + "theoretical_loss": 3.7214782862805, + "tokens_seen": 816840704 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035754824063564136, + "loss": 3.4614, + "theoretical_loss": 3.7210068855921024, + "tokens_seen": 817889280 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035735906167234206, + "loss": 3.4941, + "theoretical_loss": 3.7205362578502834, + "tokens_seen": 818937856 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.49092575907707214, + "objective/train/docs_used": 471128, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.248918056488037, + "objective/train/original_loss": 3.248918056488037, + "objective/train/theoretical_loss": 3.7204187214233073, + "objective/train/tokens_used": 839660000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24439458549022675, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503349304199219, + "objective/train/weighted_lm_loss": 3.412680149078369, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9527466297149658, + "theoretical_loss": 3.7204187214233073, + "tokens_seen": 819200000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035716988270904275, + "loss": 3.4172, + "theoretical_loss": 3.7200664008004, + "tokens_seen": 819986432 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003569807037457435, + "loss": 3.4968, + "theoretical_loss": 3.7195973121972585, + "tokens_seen": 821035008 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003567915247824442, + "loss": 3.5291, + "theoretical_loss": 3.7191289898050632, + "tokens_seen": 822083584 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.4838717579841614, + "objective/train/docs_used": 472950, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.253770589828491, + "objective/train/original_loss": 3.2537708282470703, + "objective/train/theoretical_loss": 3.718953565992031, + "objective/train/tokens_used": 842936800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2399124652147293, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496065616607666, + "objective/train/weighted_lm_loss": 3.415417194366455, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9515462517738342, + "theoretical_loss": 3.718953565992031, + "tokens_seen": 822476800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035660234581914494, + "loss": 3.4414, + "theoretical_loss": 3.7186614313973645, + "tokens_seen": 823132160 + }, + { + "epoch": 0.29, + "learning_rate": 0.00035641316685584563, + "loss": 3.4772, + "theoretical_loss": 3.7181946347570074, + "tokens_seen": 824180736 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003562239878925463, + "loss": 3.4118, + "theoretical_loss": 3.7177285976760834, + "tokens_seen": 825229312 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.4853784739971161, + "objective/train/docs_used": 475156, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4621541500091553, + "objective/train/original_loss": 3.4621541500091553, + "objective/train/theoretical_loss": 3.7174958632829522, + "objective/train/tokens_used": 846213600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24138674139976501, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04976487159729, + "objective/train/weighted_lm_loss": 3.634368658065796, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9528544545173645, + "theoretical_loss": 3.7174958632829522, + "tokens_seen": 825753600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003560348089292471, + "loss": 3.5352, + "theoretical_loss": 3.7172633179558763, + "tokens_seen": 826277888 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035584562996594777, + "loss": 3.4583, + "theoretical_loss": 3.7167987934068156, + "tokens_seen": 827326464 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003556564510026485, + "loss": 3.4423, + "theoretical_loss": 3.7163350218484252, + "tokens_seen": 828375040 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.48797664046287537, + "objective/train/docs_used": 477026, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.330416202545166, + "objective/train/original_loss": 3.330416679382324, + "objective/train/theoretical_loss": 3.716045546037287, + "objective/train/tokens_used": 849490400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24159426987171173, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500258207321167, + "objective/train/weighted_lm_loss": 3.4970216751098633, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9596421718597412, + "theoretical_loss": 3.716045546037287, + "tokens_seen": 829030400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035546727203934926, + "loss": 3.4401, + "theoretical_loss": 3.7158720011092767, + "tokens_seen": 829423616 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035527809307604996, + "loss": 3.3848, + "theoretical_loss": 3.715409729026936, + "tokens_seen": 830472192 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003550889141127507, + "loss": 3.4305, + "theoretical_loss": 3.714948203447919, + "tokens_seen": 831520768 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.48684290051460266, + "objective/train/docs_used": 478220, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.354376792907715, + "objective/train/original_loss": 3.3543763160705566, + "objective/train/theoretical_loss": 3.7146025478660274, + "objective/train/tokens_used": 852767200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24331338703632355, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499210357666016, + "objective/train/weighted_lm_loss": 3.5209054946899414, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9515804052352905, + "theoretical_loss": 3.7146025478660274, + "tokens_seen": 832307200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003548997351494514, + "loss": 3.5074, + "theoretical_loss": 3.7144874222276405, + "tokens_seen": 832569344 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003547105561861521, + "loss": 3.4283, + "theoretical_loss": 3.714027383230369, + "tokens_seen": 833617920 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035452137722285284, + "loss": 3.4665, + "theoretical_loss": 3.713568084329175, + "tokens_seen": 834666496 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.4813803732395172, + "objective/train/docs_used": 480064, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.389320135116577, + "objective/train/original_loss": 3.3893203735351562, + "objective/train/theoretical_loss": 3.713166803235318, + "objective/train/tokens_used": 856044000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24221307039260864, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049368977546692, + "objective/train/weighted_lm_loss": 3.5553438663482666, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9520329833030701, + "theoretical_loss": 3.713166803235318, + "tokens_seen": 835584000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035433219825955353, + "loss": 3.4964, + "theoretical_loss": 3.7131095234058895, + "tokens_seen": 835715072 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003541430192962543, + "loss": 3.4202, + "theoretical_loss": 3.712651698351051, + "tokens_seen": 836763648 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035395384033295497, + "loss": 3.4965, + "theoretical_loss": 3.7121946070638625, + "tokens_seen": 837812224 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.4783816933631897, + "objective/train/docs_used": 482017, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7325849533081055, + "objective/train/original_loss": 3.7325844764709473, + "objective/train/theoretical_loss": 3.7117382474521436, + "objective/train/tokens_used": 859320800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23481260240077972, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490317344665527, + "objective/train/weighted_lm_loss": 3.9163506031036377, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9517280459403992, + "theoretical_loss": 3.7117382474521436, + "tokens_seen": 838860800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035376466136965567, + "loss": 3.4554, + "theoretical_loss": 3.7117382474521436, + "tokens_seen": 838860800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003535754824063564, + "loss": 3.4283, + "theoretical_loss": 3.7112826174322864, + "tokens_seen": 839909376 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035338630344305716, + "loss": 3.4463, + "theoretical_loss": 3.7108277149292066, + "tokens_seen": 840957952 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035319712447975785, + "loss": 3.44, + "theoretical_loss": 3.7103735378763014, + "tokens_seen": 842006528 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.49178850650787354, + "objective/train/docs_used": 484166, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.338574171066284, + "objective/train/original_loss": 3.338573694229126, + "objective/train/theoretical_loss": 3.7103168166503053, + "objective/train/tokens_used": 862597600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24451500177383423, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504218339920044, + "objective/train/weighted_lm_loss": 3.5066874027252197, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.951617419719696, + "theoretical_loss": 3.7103168166503053, + "tokens_seen": 842137600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003530079455164586, + "loss": 3.4913, + "theoretical_loss": 3.7099200842154003, + "tokens_seen": 843055104 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003528187665531593, + "loss": 3.4763, + "theoretical_loss": 3.709467351896726, + "tokens_seen": 844103680 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035262958758986004, + "loss": 3.4748, + "theoretical_loss": 3.709015338878843, + "tokens_seen": 845152256 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.47579312324523926, + "objective/train/docs_used": 485899, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.143003225326538, + "objective/train/original_loss": 3.143002986907959, + "objective/train/theoretical_loss": 3.7089024477766817, + "objective/train/tokens_used": 865874400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2346218377351761, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487719774246216, + "objective/train/weighted_lm_loss": 3.297304630279541, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9735310077667236, + "theoretical_loss": 3.7089024477766817, + "tokens_seen": 845414400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035244040862656074, + "loss": 3.4219, + "theoretical_loss": 3.7085640431286198, + "tokens_seen": 846200832 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035225122966326143, + "loss": 3.4825, + "theoretical_loss": 3.7081134626211796, + "tokens_seen": 847249408 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003520620506999622, + "loss": 3.4676, + "theoretical_loss": 3.7076635953398607, + "tokens_seen": 848297984 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.4607677459716797, + "objective/train/docs_used": 487462, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2809650897979736, + "objective/train/original_loss": 3.2809653282165527, + "objective/train/theoretical_loss": 3.7074950785777787, + "objective/train/tokens_used": 869151200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22898733615875244, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0472406148910522, + "objective/train/weighted_lm_loss": 3.438236713409424, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9514792561531067, + "theoretical_loss": 3.7074950785777787, + "tokens_seen": 848691200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00035187287173666287, + "loss": 3.4275, + "theoretical_loss": 3.7072144392761697, + "tokens_seen": 849346560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003516836927733636, + "loss": 3.393, + "theoretical_loss": 3.70676599242974, + "tokens_seen": 850395136 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003514945138100643, + "loss": 3.4647, + "theoretical_loss": 3.706318252808291, + "tokens_seen": 851443712 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.47821953892707825, + "objective/train/docs_used": 489283, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.037062644958496, + "objective/train/original_loss": 3.037062644958496, + "objective/train/theoretical_loss": 3.7060946475865455, + "objective/train/tokens_used": 872428000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23509417474269867, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490169525146484, + "objective/train/weighted_lm_loss": 3.1864781379699707, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9535816311836243, + "theoretical_loss": 3.7060946475865455, + "tokens_seen": 851968000 + }, + { + "epoch": 0.3, + "learning_rate": 0.000351305334846765, + "loss": 3.5199, + "theoretical_loss": 3.705871218427581, + "tokens_seen": 852492288 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003511161558834658, + "loss": 3.4919, + "theoretical_loss": 3.705424887311368, + "tokens_seen": 853540864 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003509269769201665, + "loss": 3.5404, + "theoretical_loss": 3.704979257491368, + "tokens_seen": 854589440 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.4794943332672119, + "objective/train/docs_used": 491311, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2601847648620605, + "objective/train/original_loss": 3.2601850032806396, + "objective/train/theoretical_loss": 3.70470109410946, + "objective/train/tokens_used": 875704800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2394295483827591, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491665601730347, + "objective/train/weighted_lm_loss": 3.4213621616363525, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9513980746269226, + "theoretical_loss": 3.70470109410946, + "tokens_seen": 855244800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00035073779795686725, + "loss": 3.4952, + "theoretical_loss": 3.704534327007211, + "tokens_seen": 855638016 + }, + { + "epoch": 0.31, + "learning_rate": 0.00035054861899356794, + "loss": 3.485, + "theoretical_loss": 3.7040900939064008, + "tokens_seen": 856686592 + }, + { + "epoch": 0.31, + "learning_rate": 0.00035035944003026864, + "loss": 3.459, + "theoretical_loss": 3.7036465562442746, + "tokens_seen": 857735168 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.4735029935836792, + "objective/train/docs_used": 493471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3861629962921143, + "objective/train/original_loss": 3.3861632347106934, + "objective/train/theoretical_loss": 3.7033143582138752, + "objective/train/tokens_used": 878981600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23431852459907532, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485413074493408, + "objective/train/weighted_lm_loss": 3.551286220550537, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9518417119979858, + "theoretical_loss": 3.7033143582138752, + "tokens_seen": 858521600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003501702610669694, + "loss": 3.4133, + "theoretical_loss": 3.703203712083961, + "tokens_seen": 858783744 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003499810821036701, + "loss": 3.5271, + "theoretical_loss": 3.702761559496338, + "tokens_seen": 859832320 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034979190314037077, + "loss": 3.4913, + "theoretical_loss": 3.7023200965599967, + "tokens_seen": 860880896 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.48700904846191406, + "objective/train/docs_used": 495231, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3630502223968506, + "objective/train/original_loss": 3.3630504608154297, + "objective/train/theoretical_loss": 3.701934380715622, + "objective/train/tokens_used": 882258400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24430139362812042, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499428510665894, + "objective/train/weighted_lm_loss": 3.531970262527466, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9546670317649841, + "theoretical_loss": 3.701934380715622, + "tokens_seen": 861798400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003496027241770715, + "loss": 3.4939, + "theoretical_loss": 3.7018793213611954, + "tokens_seen": 861929472 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003494135452137722, + "loss": 3.4595, + "theoretical_loss": 3.7014392319938265, + "tokens_seen": 862978048 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034922436625047296, + "loss": 3.5073, + "theoretical_loss": 3.700999826559369, + "tokens_seen": 864026624 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.4687807261943817, + "objective/train/docs_used": 497313, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8510427474975586, + "objective/train/original_loss": 2.8510427474975586, + "objective/train/theoretical_loss": 3.700561103166857, + "objective/train/tokens_used": 885535200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2361258715391159, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0480778217315674, + "objective/train/weighted_lm_loss": 2.9898033142089844, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9532948732376099, + "theoretical_loss": 3.700561103166857, + "tokens_seen": 865075200 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034903518728717365, + "loss": 3.435, + "theoretical_loss": 3.700561103166857, + "tokens_seen": 865075200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003488460083238744, + "loss": 3.4834, + "theoretical_loss": 3.7001230599328334, + "tokens_seen": 866123776 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034865682936057515, + "loss": 3.4139, + "theoretical_loss": 3.6996856949813184, + "tokens_seen": 867172352 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034846765039727584, + "loss": 3.5175, + "theoretical_loss": 3.6992490064437624, + "tokens_seen": 868220928 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.4850696623325348, + "objective/train/docs_used": 499341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.129650831222534, + "objective/train/original_loss": 3.1296510696411133, + "objective/train/theoretical_loss": 3.6991944678441504, + "objective/train/tokens_used": 888812000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24183686077594757, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497361421585083, + "objective/train/weighted_lm_loss": 3.2843034267425537, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.952808678150177, + "theoretical_loss": 3.6991944678441504, + "tokens_seen": 868352000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003482784714339766, + "loss": 3.402, + "theoretical_loss": 3.6988129924590156, + "tokens_seen": 869269504 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003480892924706773, + "loss": 3.4196, + "theoretical_loss": 3.698377651173285, + "tokens_seen": 870318080 + }, + { + "epoch": 0.31, + "learning_rate": 0.000347900113507378, + "loss": 3.4628, + "theoretical_loss": 3.6979429807400965, + "tokens_seen": 871366656 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.48964646458625793, + "objective/train/docs_used": 501132, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.557471513748169, + "objective/train/original_loss": 3.557471752166748, + "objective/train/theoretical_loss": 3.69783441773682, + "objective/train/tokens_used": 892088800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24149833619594574, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501922369003296, + "objective/train/weighted_lm_loss": 3.736229419708252, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.9738736152648926, + "theoretical_loss": 3.69783441773682, + "tokens_seen": 871628800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003477109345440787, + "loss": 3.5082, + "theoretical_loss": 3.6975089793202613, + "tokens_seen": 872415232 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003475217555807794, + "loss": 3.459, + "theoretical_loss": 3.697075645081833, + "tokens_seen": 873463808 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003473325766174801, + "loss": 3.4345, + "theoretical_loss": 3.6966429762000756, + "tokens_seen": 874512384 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.48227402567863464, + "objective/train/docs_used": 502873, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.550771713256836, + "objective/train/original_loss": 3.550771713256836, + "objective/train/theoretical_loss": 3.696480896535487, + "objective/train/tokens_used": 895365600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23732295632362366, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494335889816284, + "objective/train/weighted_lm_loss": 3.7260501384735107, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9638433456420898, + "theoretical_loss": 3.696480896535487, + "tokens_seen": 874905600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034714339765418086, + "loss": 3.4506, + "theoretical_loss": 3.696210970857422, + "tokens_seen": 875560960 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034695421869088155, + "loss": 3.3722, + "theoretical_loss": 3.695779627243439, + "tokens_seen": 876609536 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003467650397275823, + "loss": 3.3784, + "theoretical_loss": 3.695348943554793, + "tokens_seen": 877658112 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.48700252175331116, + "objective/train/docs_used": 504313, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2833023071289062, + "objective/train/original_loss": 3.2833027839660645, + "objective/train/theoretical_loss": 3.695133848620862, + "objective/train/tokens_used": 898642400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24063783884048462, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499235391616821, + "objective/train/weighted_lm_loss": 3.4479916095733643, + "objective/train/weights_max": 1.0512157678604126, + "objective/train/weights_min": 0.9604228734970093, + "theoretical_loss": 3.695133848620862, + "tokens_seen": 878182400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034657586076428305, + "loss": 3.3717, + "theoretical_loss": 3.6949189179952113, + "tokens_seen": 878706688 + }, + { + "epoch": 0.31, + "learning_rate": 0.00034638668180098374, + "loss": 3.4457, + "theoretical_loss": 3.6944895487754454, + "tokens_seen": 879755264 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003461975028376845, + "loss": 3.3993, + "theoretical_loss": 3.6940608341132375, + "tokens_seen": 880803840 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.4854235053062439, + "objective/train/docs_used": 506500, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4923324584960938, + "objective/train/original_loss": 3.4923322200775146, + "objective/train/theoretical_loss": 3.693793219052748, + "objective/train/tokens_used": 901919200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24003635346889496, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049762487411499, + "objective/train/weighted_lm_loss": 3.665571928024292, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9516666531562805, + "theoretical_loss": 3.693793219052748, + "tokens_seen": 881459200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003460083238743852, + "loss": 3.3099, + "theoretical_loss": 3.693632772233284, + "tokens_seen": 881852416 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034581914491108593, + "loss": 3.4134, + "theoretical_loss": 3.6932053613671982, + "tokens_seen": 882900992 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003456299659477866, + "loss": 3.4172, + "theoretical_loss": 3.6927785997534794, + "tokens_seen": 883949568 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.4926452040672302, + "objective/train/docs_used": 508372, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.025479793548584, + "objective/train/original_loss": 3.025480031967163, + "objective/train/theoretical_loss": 3.6924589535592656, + "objective/train/tokens_used": 905196000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24352741241455078, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505025386810303, + "objective/train/weighted_lm_loss": 3.178568124771118, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9983458518981934, + "theoretical_loss": 3.6924589535592656, + "tokens_seen": 884736000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003454407869844873, + "loss": 3.4296, + "theoretical_loss": 3.692352485637474, + "tokens_seen": 884998144 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034525160802118806, + "loss": 3.4518, + "theoretical_loss": 3.6919270172713414, + "tokens_seen": 886046720 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034506242905788876, + "loss": 3.4126, + "theoretical_loss": 3.6915021929140224, + "tokens_seen": 887095296 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.47590452432632446, + "objective/train/docs_used": 510118, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.510807991027832, + "objective/train/original_loss": 3.510807991027832, + "objective/train/theoretical_loss": 3.691130998526281, + "objective/train/tokens_used": 908472800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24169528484344482, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048818826675415, + "objective/train/weighted_lm_loss": 3.6790857315063477, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9513728022575378, + "theoretical_loss": 3.691130998526281, + "tokens_seen": 888012800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034487325009458945, + "loss": 3.5185, + "theoretical_loss": 3.691078010831202, + "tokens_seen": 888143872 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003446840711312902, + "loss": 3.4558, + "theoretical_loss": 3.690654469295275, + "tokens_seen": 889192448 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003444948921679909, + "loss": 3.4879, + "theoretical_loss": 3.6902315665853163, + "tokens_seen": 890241024 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.4895252287387848, + "objective/train/docs_used": 511934, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.69549822807312, + "objective/train/original_loss": 3.695497989654541, + "objective/train/theoretical_loss": 3.689809300987042, + "objective/train/tokens_used": 911749600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24316494166851044, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050188660621643, + "objective/train/weighted_lm_loss": 3.88081955909729, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9516122937202454, + "theoretical_loss": 3.689809300987042, + "tokens_seen": 891289600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003443057132046917, + "loss": 3.4757, + "theoretical_loss": 3.689809300987042, + "tokens_seen": 891289600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003441165342413924, + "loss": 3.3858, + "theoretical_loss": 3.6893876707927777, + "tokens_seen": 892338176 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003439273552780931, + "loss": 3.4611, + "theoretical_loss": 3.6889666743014295, + "tokens_seen": 893386752 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034373817631479383, + "loss": 3.3919, + "theoretical_loss": 3.6885463098184434, + "tokens_seen": 894435328 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.47695353627204895, + "objective/train/docs_used": 513625, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3136649131774902, + "objective/train/original_loss": 3.3136653900146484, + "objective/train/theoretical_loss": 3.688493808612015, + "objective/train/tokens_used": 915026400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2371138483285904, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489006042480469, + "objective/train/weighted_lm_loss": 3.475831985473633, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9526435136795044, + "theoretical_loss": 3.688493808612015, + "tokens_seen": 894566400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003435489973514945, + "loss": 3.4412, + "theoretical_loss": 3.6881265756557795, + "tokens_seen": 895483904 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034335981838819527, + "loss": 3.406, + "theoretical_loss": 3.6877074701318735, + "tokens_seen": 896532480 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034317063942489596, + "loss": 3.4235, + "theoretical_loss": 3.6872889915716107, + "tokens_seen": 897581056 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.48399674892425537, + "objective/train/docs_used": 515235, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2255749702453613, + "objective/train/original_loss": 3.2255749702453613, + "objective/train/theoretical_loss": 3.6871844696989227, + "objective/train/tokens_used": 918303200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23830629885196686, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04961097240448, + "objective/train/weighted_lm_loss": 3.38556170463562, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9623337388038635, + "theoretical_loss": 3.6871844696989227, + "tokens_seen": 897843200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034298146046159666, + "loss": 3.4544, + "theoretical_loss": 3.6868711383062873, + "tokens_seen": 898629632 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003427922814982974, + "loss": 3.4239, + "theoretical_loss": 3.686453908673583, + "tokens_seen": 899678208 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003426031025349981, + "loss": 3.4579, + "theoretical_loss": 3.6860373010175262, + "tokens_seen": 900726784 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.4840245246887207, + "objective/train/docs_used": 516962, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5026180744171143, + "objective/train/original_loss": 3.5026183128356934, + "objective/train/theoretical_loss": 3.685881233162962, + "objective/train/tokens_used": 921580000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2395327091217041, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496200323104858, + "objective/train/weighted_lm_loss": 3.6757030487060547, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9527707695960999, + "theoretical_loss": 3.685881233162962, + "tokens_seen": 901120000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003424139235716988, + "loss": 3.4299, + "theoretical_loss": 3.685621313688465, + "tokens_seen": 901775360 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034222474460839954, + "loss": 3.4742, + "theoretical_loss": 3.6852059450430343, + "tokens_seen": 902823936 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003420355656451003, + "loss": 3.4374, + "theoretical_loss": 3.6847911934441244, + "tokens_seen": 903872512 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.4911254346370697, + "objective/train/docs_used": 518997, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.090237617492676, + "objective/train/original_loss": 3.0902373790740967, + "objective/train/theoretical_loss": 3.6845840485272205, + "objective/train/tokens_used": 924856800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24378250539302826, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503517389297485, + "objective/train/weighted_lm_loss": 3.2452797889709473, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9669274687767029, + "theoretical_loss": 3.6845840485272205, + "tokens_seen": 904396800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034184638668180103, + "loss": 3.4429, + "theoretical_loss": 3.6843770572608507, + "tokens_seen": 904921088 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003416572077185017, + "loss": 3.4243, + "theoretical_loss": 3.6839635348685222, + "tokens_seen": 905969664 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003414680287552024, + "loss": 3.4806, + "theoretical_loss": 3.6835506246486105, + "tokens_seen": 907018240 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.48305195569992065, + "objective/train/docs_used": 520740, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1138951778411865, + "objective/train/original_loss": 3.1138947010040283, + "objective/train/theoretical_loss": 3.6832928659132724, + "objective/train/tokens_used": 928133600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23734770715236664, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495115518569946, + "objective/train/weighted_lm_loss": 3.2679831981658936, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9549471735954285, + "theoretical_loss": 3.6832928659132724, + "tokens_seen": 907673600 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034127884979190317, + "loss": 3.4425, + "theoretical_loss": 3.6831383249887226, + "tokens_seen": 908066816 + }, + { + "epoch": 0.32, + "learning_rate": 0.00034108967082860386, + "loss": 3.4278, + "theoretical_loss": 3.682726634282564, + "tokens_seen": 909115392 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003409004918653046, + "loss": 3.4633, + "theoretical_loss": 3.682315550929917, + "tokens_seen": 910163968 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.4881671071052551, + "objective/train/docs_used": 522796, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8828225135803223, + "objective/train/original_loss": 2.8828227519989014, + "objective/train/theoretical_loss": 3.6820076360319485, + "objective/train/tokens_used": 931410400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2397347241640091, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500353574752808, + "objective/train/weighted_lm_loss": 3.027292251586914, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9536739587783813, + "theoretical_loss": 3.6820076360319485, + "tokens_seen": 910950400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003407113129020053, + "loss": 3.4252, + "theoretical_loss": 3.6819050733366017, + "tokens_seen": 911212544 + }, + { + "epoch": 0.33, + "learning_rate": 0.000340522133938706, + "loss": 3.4594, + "theoretical_loss": 3.6814951999144547, + "tokens_seen": 912261120 + }, + { + "epoch": 0.33, + "learning_rate": 0.00034033295497540674, + "loss": 3.4095, + "theoretical_loss": 3.681085929081294, + "tokens_seen": 913309696 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.4804491102695465, + "objective/train/docs_used": 524442, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4967141151428223, + "objective/train/original_loss": 3.4967143535614014, + "objective/train/theoretical_loss": 3.6807283101742865, + "objective/train/tokens_used": 934687200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23661978542804718, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04924738407135, + "objective/train/weighted_lm_loss": 3.6696856021881104, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9522182941436768, + "theoretical_loss": 3.6807283101742865, + "tokens_seen": 914227200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00034014377601210744, + "loss": 3.4546, + "theoretical_loss": 3.680677259260892, + "tokens_seen": 914358272 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033995459704880813, + "loss": 3.4304, + "theoretical_loss": 3.6802691888829453, + "tokens_seen": 915406848 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033976541808550893, + "loss": 3.5283, + "theoretical_loss": 3.679861716383046, + "tokens_seen": 916455424 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.49545738101005554, + "objective/train/docs_used": 526356, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.091348886489868, + "objective/train/original_loss": 3.0913491249084473, + "objective/train/theoretical_loss": 3.6794548402026535, + "objective/train/tokens_used": 937964000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2462480366230011, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0507975816726685, + "objective/train/weighted_lm_loss": 3.2484302520751953, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9557216167449951, + "theoretical_loss": 3.6794548402026535, + "tokens_seen": 917504000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003395762391222096, + "loss": 3.359, + "theoretical_loss": 3.6794548402026535, + "tokens_seen": 917504000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003393870601589104, + "loss": 3.4658, + "theoretical_loss": 3.6790485587890642, + "tokens_seen": 918552576 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033919788119561107, + "loss": 3.4122, + "theoretical_loss": 3.6786428705953855, + "tokens_seen": 919601152 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033900870223231176, + "loss": 3.4613, + "theoretical_loss": 3.6782377740805043, + "tokens_seen": 920649728 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.48470258712768555, + "objective/train/docs_used": 528450, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.134005308151245, + "objective/train/original_loss": 3.1340060234069824, + "objective/train/theoretical_loss": 3.678187178542029, + "objective/train/tokens_used": 941240800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24029278755187988, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496915578842163, + "objective/train/weighted_lm_loss": 3.289982795715332, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9536044597625732, + "theoretical_loss": 3.678187178542029, + "tokens_seen": 920780800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003388195232690125, + "loss": 3.4477, + "theoretical_loss": 3.6778332677090617, + "tokens_seen": 921698304 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003386303443057132, + "loss": 3.4984, + "theoretical_loss": 3.6774293499514243, + "tokens_seen": 922746880 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033844116534241395, + "loss": 3.4305, + "theoretical_loss": 3.6770260192836544, + "tokens_seen": 923795456 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.48579150438308716, + "objective/train/docs_used": 529792, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3090968132019043, + "objective/train/original_loss": 3.3090968132019043, + "objective/train/theoretical_loss": 3.6769252781714576, + "objective/train/tokens_used": 944517600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24083971977233887, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049803376197815, + "objective/train/weighted_lm_loss": 3.472637176513672, + "objective/train/weights_max": 1.0512195825576782, + "objective/train/weights_min": 0.9520125389099121, + "theoretical_loss": 3.6769252781714576, + "tokens_seen": 924057600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033825198637911464, + "loss": 3.3764, + "theoretical_loss": 3.6766232741874845, + "tokens_seen": 924844032 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033806280741581534, + "loss": 3.4332, + "theoretical_loss": 3.67622111315029, + "tokens_seen": 925892608 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003378736284525161, + "loss": 3.3611, + "theoretical_loss": 3.6758195346650595, + "tokens_seen": 926941184 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.4928480386734009, + "objective/train/docs_used": 531599, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.557663679122925, + "objective/train/original_loss": 3.557663679122925, + "objective/train/theoretical_loss": 3.6756690926156557, + "objective/train/tokens_used": 947794400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2444053441286087, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505273342132568, + "objective/train/weighted_lm_loss": 3.7376136779785156, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9553889632225037, + "theoretical_loss": 3.6756690926156557, + "tokens_seen": 927334400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003376844494892168, + "loss": 3.275, + "theoretical_loss": 3.6754185372303705, + "tokens_seen": 927989760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003374952705259176, + "loss": 3.4063, + "theoretical_loss": 3.6750181193503604, + "tokens_seen": 929038336 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033730609156261827, + "loss": 3.2966, + "theoretical_loss": 3.6746182795347013, + "tokens_seen": 930086912 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.48329272866249084, + "objective/train/docs_used": 533595, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.309666156768799, + "objective/train/original_loss": 3.309666156768799, + "objective/train/theoretical_loss": 3.674418575936782, + "objective/train/tokens_used": 951071200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23755574226379395, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495368242263794, + "objective/train/weighted_lm_loss": 3.4753262996673584, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9564974308013916, + "theoretical_loss": 3.674418575936782, + "tokens_seen": 930611200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033711691259931897, + "loss": 3.3508, + "theoretical_loss": 3.674219016298571, + "tokens_seen": 931135488 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003369277336360197, + "loss": 3.351, + "theoretical_loss": 3.673820328162628, + "tokens_seen": 932184064 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003367385546727204, + "loss": 3.367, + "theoretical_loss": 3.673422213652986, + "tokens_seen": 933232640 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.48411282896995544, + "objective/train/docs_used": 535440, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0631942749023438, + "objective/train/original_loss": 3.063194751739502, + "objective/train/theoretical_loss": 3.6731736827263513, + "objective/train/tokens_used": 954348000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23934400081634521, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496280193328857, + "objective/train/weighted_lm_loss": 3.2147536277770996, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9552776217460632, + "theoretical_loss": 3.6731736827263513, + "tokens_seen": 933888000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003365493757094211, + "loss": 3.3911, + "theoretical_loss": 3.673024671301186, + "tokens_seen": 934281216 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033636019674612185, + "loss": 3.4291, + "theoretical_loss": 3.6726276996441705, + "tokens_seen": 935329792 + }, + { + "epoch": 0.33, + "learning_rate": 0.00033617101778282254, + "loss": 3.3124, + "theoretical_loss": 3.6722312972242594, + "tokens_seen": 936378368 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.4916326701641083, + "objective/train/docs_used": 537222, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.992642879486084, + "objective/train/original_loss": 2.992642879486084, + "objective/train/theoretical_loss": 3.6719343680973067, + "objective/train/tokens_used": 957624800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24388481676578522, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050403118133545, + "objective/train/weighted_lm_loss": 3.1425609588623047, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9709485173225403, + "theoretical_loss": 3.6719343680973067, + "tokens_seen": 937164800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003359818388195233, + "loss": 3.3708, + "theoretical_loss": 3.6718354625891205, + "tokens_seen": 937426944 + }, + { + "epoch": 0.34, + "learning_rate": 0.000335792659856224, + "loss": 3.282, + "theoretical_loss": 3.6714401942917485, + "tokens_seen": 938475520 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003356034808929247, + "loss": 3.3235, + "theoretical_loss": 3.6710454908904366, + "tokens_seen": 939524096 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.48321402072906494, + "objective/train/docs_used": 539286, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.189788818359375, + "objective/train/original_loss": 3.189789295196533, + "objective/train/theoretical_loss": 3.6707005876762313, + "objective/train/tokens_used": 960901600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2379058301448822, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495306253433228, + "objective/train/weighted_lm_loss": 3.346953868865967, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9523076415061951, + "theoretical_loss": 3.6707005876762313, + "tokens_seen": 940441600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003354143019296254, + "loss": 3.3441, + "theoretical_loss": 3.6706513509487513, + "tokens_seen": 940572672 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033522512296632617, + "loss": 3.3747, + "theoretical_loss": 3.6702577730355084, + "tokens_seen": 941621248 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003350359440030269, + "loss": 3.4368, + "theoretical_loss": 3.6698647557247472, + "tokens_seen": 942669824 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.48219361901283264, + "objective/train/docs_used": 541407, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.251582622528076, + "objective/train/original_loss": 3.251582622528076, + "objective/train/theoretical_loss": 3.6694722975957066, + "objective/train/tokens_used": 964178400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23559176921844482, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494168996810913, + "objective/train/weighted_lm_loss": 3.411501407623291, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9520928859710693, + "theoretical_loss": 3.6694722975957066, + "tokens_seen": 943718400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003348467650397276, + "loss": 3.3476, + "theoretical_loss": 3.6694722975957066, + "tokens_seen": 943718400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003346575860764283, + "loss": 3.4193, + "theoretical_loss": 3.6690803972327988, + "tokens_seen": 944766976 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033446840711312905, + "loss": 3.3699, + "theoretical_loss": 3.6686890532255862, + "tokens_seen": 945815552 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033427922814982975, + "loss": 3.3711, + "theoretical_loss": 3.6682982641687563, + "tokens_seen": 946864128 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.48947545886039734, + "objective/train/docs_used": 543040, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.059685707092285, + "objective/train/original_loss": 3.059685707092285, + "objective/train/theoretical_loss": 3.668249454486809, + "objective/train/tokens_used": 967455200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24430738389492035, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501893758773804, + "objective/train/weighted_lm_loss": 3.213547945022583, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.951458215713501, + "theoretical_loss": 3.668249454486809, + "tokens_seen": 946995200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033409004918653044, + "loss": 3.3339, + "theoretical_loss": 3.6679080286620973, + "tokens_seen": 947912704 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003339008702232312, + "loss": 3.3752, + "theoretical_loss": 3.6675183453104747, + "tokens_seen": 948961280 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003337116912599319, + "loss": 3.4667, + "theoretical_loss": 3.6671292127238067, + "tokens_seen": 950009856 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.4804360866546631, + "objective/train/docs_used": 545196, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2226321697235107, + "objective/train/original_loss": 3.2226319313049316, + "objective/train/theoretical_loss": 3.667032015471743, + "objective/train/tokens_used": 970732000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24167245626449585, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492719411849976, + "objective/train/weighted_lm_loss": 3.3797309398651123, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9517346620559692, + "theoretical_loss": 3.667032015471743, + "tokens_seen": 950272000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033352251229663263, + "loss": 3.4203, + "theoretical_loss": 3.66674062951704, + "tokens_seen": 951058432 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003333333333333333, + "loss": 3.335, + "theoretical_loss": 3.666352594310127, + "tokens_seen": 952107008 + }, + { + "epoch": 0.34, + "learning_rate": 0.000333144154370034, + "loss": 3.3466, + "theoretical_loss": 3.6659651057280023, + "tokens_seen": 953155584 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.4831368923187256, + "objective/train/docs_used": 546810, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0602049827575684, + "objective/train/original_loss": 3.0602047443389893, + "objective/train/theoretical_loss": 3.66581993815661, + "objective/train/tokens_used": 974008800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2421431988477707, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495444536209106, + "objective/train/weighted_lm_loss": 3.211397409439087, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9515434503555298, + "theoretical_loss": 3.66581993815661, + "tokens_seen": 953548800 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033295497540673476, + "loss": 3.3687, + "theoretical_loss": 3.665578162400558, + "tokens_seen": 954204160 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003327657964434355, + "loss": 3.3398, + "theoretical_loss": 3.66519176296262, + "tokens_seen": 955252736 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033257661748013626, + "loss": 3.3475, + "theoretical_loss": 3.664805906053928, + "tokens_seen": 956301312 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.4918384850025177, + "objective/train/docs_used": 548880, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3802547454833984, + "objective/train/original_loss": 3.3802547454833984, + "objective/train/theoretical_loss": 3.664613180624306, + "objective/train/tokens_used": 977285600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2429351955652237, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050418734550476, + "objective/train/weighted_lm_loss": 3.550429582595825, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9534175992012024, + "theoretical_loss": 3.664613180624306, + "tokens_seen": 956825600 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033238743851683695, + "loss": 3.4054, + "theoretical_loss": 3.6644205903191107, + "tokens_seen": 957349888 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033219825955353765, + "loss": 3.362, + "theoretical_loss": 3.664035814407661, + "tokens_seen": 958398464 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003320090805902384, + "loss": 3.3747, + "theoretical_loss": 3.663651576973915, + "tokens_seen": 959447040 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.4942702054977417, + "objective/train/docs_used": 550512, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2487757205963135, + "objective/train/original_loss": 3.2487761974334717, + "objective/train/theoretical_loss": 3.663411701427548, + "objective/train/tokens_used": 980562400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24505145847797394, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506727695465088, + "objective/train/weighted_lm_loss": 3.413585662841797, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9725967645645142, + "theoretical_loss": 3.663411701427548, + "tokens_seen": 960102400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003318199016269391, + "loss": 3.3048, + "theoretical_loss": 3.66326787667703, + "tokens_seen": 960495616 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003316307226636398, + "loss": 3.401, + "theoretical_loss": 3.6628847121809613, + "tokens_seen": 961544192 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033144154370034053, + "loss": 3.3882, + "theoretical_loss": 3.662502082154439, + "tokens_seen": 962592768 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.48432281613349915, + "objective/train/docs_used": 552388, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.152970314025879, + "objective/train/original_loss": 3.152970314025879, + "objective/train/theoretical_loss": 3.662215459582027, + "objective/train/tokens_used": 983839200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24182836711406708, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496615171432495, + "objective/train/weighted_lm_loss": 3.308091640472412, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9514780640602112, + "theoretical_loss": 3.662215459582027, + "tokens_seen": 963379200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003312523647370412, + "loss": 3.3735, + "theoretical_loss": 3.662119985270947, + "tokens_seen": 963641344 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033106318577374197, + "loss": 3.4034, + "theoretical_loss": 3.6617384202087004, + "tokens_seen": 964689920 + }, + { + "epoch": 0.34, + "learning_rate": 0.00033087400681044266, + "loss": 3.4614, + "theoretical_loss": 3.6613573856506236, + "tokens_seen": 965738496 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.49010223150253296, + "objective/train/docs_used": 553451, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1867620944976807, + "objective/train/original_loss": 3.1867618560791016, + "objective/train/theoretical_loss": 3.661024414559681, + "objective/train/tokens_used": 987116000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24266354739665985, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050243854522705, + "objective/train/weighted_lm_loss": 3.3466124534606934, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9682541489601135, + "theoretical_loss": 3.661024414559681, + "tokens_seen": 966656000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033068482784714336, + "loss": 3.3727, + "theoretical_loss": 3.6609768802843274, + "tokens_seen": 966787072 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033049564888384416, + "loss": 3.4051, + "theoretical_loss": 3.660596902802089, + "tokens_seen": 967835648 + }, + { + "epoch": 0.35, + "learning_rate": 0.00033030646992054485, + "loss": 3.5094, + "theoretical_loss": 3.66021745190083, + "tokens_seen": 968884224 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.48655059933662415, + "objective/train/docs_used": 554109, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4037506580352783, + "objective/train/original_loss": 3.4037506580352783, + "objective/train/theoretical_loss": 3.6598385262820923, + "objective/train/tokens_used": 990392800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24164670705795288, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049883484840393, + "objective/train/weighted_lm_loss": 3.573542356491089, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9580250978469849, + "theoretical_loss": 3.6598385262820923, + "tokens_seen": 969932800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003301172909572456, + "loss": 3.4282, + "theoretical_loss": 3.6598385262820923, + "tokens_seen": 969932800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003299281119939463, + "loss": 3.5318, + "theoretical_loss": 3.659460124652022, + "tokens_seen": 970981376 + }, + { + "epoch": 0.35, + "learning_rate": 0.000329738933030647, + "loss": 3.528, + "theoretical_loss": 3.6590822457213426, + "tokens_seen": 972029952 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032954975406734773, + "loss": 3.6384, + "theoretical_loss": 3.658704888205337, + "tokens_seen": 973078528 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.48475736379623413, + "objective/train/docs_used": 555679, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5973386764526367, + "objective/train/original_loss": 3.5973386764526367, + "objective/train/theoretical_loss": 3.6586577551139974, + "objective/train/tokens_used": 993669600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2393985539674759, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496925115585327, + "objective/train/weighted_lm_loss": 3.776460647583008, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9569512009620667, + "theoretical_loss": 3.6586577551139974, + "tokens_seen": 973209600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003293605751040484, + "loss": 3.5721, + "theoretical_loss": 3.658328050823826, + "tokens_seen": 974127104 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003291713961407491, + "loss": 3.5957, + "theoretical_loss": 3.657951732301148, + "tokens_seen": 975175680 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032898221717744987, + "loss": 3.5828, + "theoretical_loss": 3.657575931366135, + "tokens_seen": 976224256 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.4789718985557556, + "objective/train/docs_used": 557385, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.122087001800537, + "objective/train/original_loss": 3.122087001800537, + "objective/train/theoretical_loss": 3.657482061856916, + "objective/train/tokens_used": 996946400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23479175567626953, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490905046463013, + "objective/train/weighted_lm_loss": 3.275631904602051, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9560718536376953, + "theoretical_loss": 3.657482061856916, + "tokens_seen": 976486400 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032879303821415056, + "loss": 3.6054, + "theoretical_loss": 3.6572006467520968, + "tokens_seen": 977272832 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003286038592508513, + "loss": 3.6388, + "theoretical_loss": 3.6568258771967965, + "tokens_seen": 978321408 + }, + { + "epoch": 0.35, + "learning_rate": 0.000328414680287552, + "loss": 3.5389, + "theoretical_loss": 3.6564516214424323, + "tokens_seen": 979369984 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.4748334586620331, + "objective/train/docs_used": 559012, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1960604190826416, + "objective/train/original_loss": 3.1960604190826416, + "objective/train/theoretical_loss": 3.656311407742891, + "objective/train/tokens_used": 1000223200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2362358719110489, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048684000968933, + "objective/train/weighted_lm_loss": 3.353394031524658, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9522448778152466, + "theoretical_loss": 3.656311407742891, + "tokens_seen": 979763200 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032822550132425275, + "loss": 3.5887, + "theoretical_loss": 3.656077878235617, + "tokens_seen": 980418560 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003280363223609535, + "loss": 3.5621, + "theoretical_loss": 3.6557046463273557, + "tokens_seen": 981467136 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003278471433976542, + "loss": 3.5904, + "theoretical_loss": 3.655331924473029, + "tokens_seen": 982515712 + }, + { + "debugging/Self-BLEU-5": 0.4286046663919377, + "debugging/distinct-1-grams": 0.8147567798871364, + "debugging/distinct-2-grams": 0.9823269374342457, + "debugging/entropy-1-grams": 6.1671920556004824, + "debugging/entropy-2-grams": 6.947028138756313, + "debugging/length": 477.53333333333336, + "debugging/num_segments": 15, + "debugging/raw_token_scores_avg": 0.020611366257071495, + "debugging/raw_token_scores_std": 0.08496682345867157, + "epoch": 0.35, + "objective/train/advantage_avg": 0.4793747663497925, + "objective/train/docs_used": 560408, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.48490047454834, + "objective/train/original_loss": 3.48490047454834, + "objective/train/theoretical_loss": 3.6551457544283386, + "objective/train/tokens_used": 1003500000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2370256930589676, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491422414779663, + "objective/train/weighted_lm_loss": 3.6559360027313232, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9516208171844482, + "theoretical_loss": 3.6551457544283386, + "tokens_seen": 983040000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032765796443435494, + "loss": 3.5123, + "theoretical_loss": 3.6549597114323706, + "tokens_seen": 983564288 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032746878547105563, + "loss": 3.567, + "theoretical_loss": 3.6545880059694484, + "tokens_seen": 984612864 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003272796065077563, + "loss": 3.5675, + "theoretical_loss": 3.6542168068526433, + "tokens_seen": 985661440 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.47530868649482727, + "objective/train/docs_used": 562309, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.699045181274414, + "objective/train/original_loss": 3.699045181274414, + "objective/train/theoretical_loss": 3.6539850639880065, + "objective/train/tokens_used": 1006776800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23356567323207855, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487180948257446, + "objective/train/weighted_lm_loss": 3.881354808807373, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9573073387145996, + "theoretical_loss": 3.6539850639880065, + "tokens_seen": 986316800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003270904275444571, + "loss": 3.5629, + "theoretical_loss": 3.653846112854634, + "tokens_seen": 986710016 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032690124858115777, + "loss": 3.4911, + "theoretical_loss": 3.6534759227523708, + "tokens_seen": 987758592 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003267120696178585, + "loss": 3.5047, + "theoretical_loss": 3.653106235327061, + "tokens_seen": 988807168 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.47373899817466736, + "objective/train/docs_used": 564240, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.646477222442627, + "objective/train/original_loss": 3.646477699279785, + "objective/train/theoretical_loss": 3.65282929890904, + "objective/train/tokens_used": 1010053600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23349297046661377, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485607385635376, + "objective/train/weighted_lm_loss": 3.822693347930908, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9552621245384216, + "theoretical_loss": 3.65282929890904, + "tokens_seen": 989593600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003265228906545592, + "loss": 3.5004, + "theoretical_loss": 3.6527370493641493, + "tokens_seen": 989855744 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003263337116912599, + "loss": 3.4973, + "theoretical_loss": 3.652368363653297, + "tokens_seen": 990904320 + }, + { + "epoch": 0.35, + "learning_rate": 0.00032614453272796065, + "loss": 3.5477, + "theoretical_loss": 3.6520001769883628, + "tokens_seen": 991952896 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.47648999094963074, + "objective/train/docs_used": 566160, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7423951625823975, + "objective/train/original_loss": 3.7423954010009766, + "objective/train/theoretical_loss": 3.651678422085146, + "objective/train/tokens_used": 1013330400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2367618978023529, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488523244857788, + "objective/train/weighted_lm_loss": 3.9235517978668213, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9544411301612854, + "theoretical_loss": 3.651678422085146, + "tokens_seen": 992870400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003259553537646614, + "loss": 3.4798, + "theoretical_loss": 3.651632488167385, + "tokens_seen": 993001472 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003257661748013621, + "loss": 3.5252, + "theoretical_loss": 3.651265295992563, + "tokens_seen": 994050048 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032557699583806284, + "loss": 3.4845, + "theoretical_loss": 3.650898599270236, + "tokens_seen": 995098624 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.46204280853271484, + "objective/train/docs_used": 567938, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3266007900238037, + "objective/train/original_loss": 3.3266003131866455, + "objective/train/theoretical_loss": 3.6505323968108674, + "objective/train/tokens_used": 1016607200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23566798865795135, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047401785850525, + "objective/train/weighted_lm_loss": 3.4848270416259766, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9548930525779724, + "theoretical_loss": 3.6505323968108674, + "tokens_seen": 996147200 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032538781687476353, + "loss": 3.4402, + "theoretical_loss": 3.6505323968108674, + "tokens_seen": 996147200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003251986379114643, + "loss": 3.5624, + "theoretical_loss": 3.6501666874290244, + "tokens_seen": 997195776 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032500945894816497, + "loss": 3.4626, + "theoretical_loss": 3.6498014699433603, + "tokens_seen": 998244352 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032482027998486567, + "loss": 3.4443, + "theoretical_loss": 3.6494367431765955, + "tokens_seen": 999292928 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.49096834659576416, + "objective/train/docs_used": 570122, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0309109687805176, + "objective/train/original_loss": 3.0309109687805176, + "objective/train/theoretical_loss": 3.6493911867759463, + "objective/train/tokens_used": 1019884000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24276913702487946, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503308773040771, + "objective/train/weighted_lm_loss": 3.183624029159546, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9735167622566223, + "theoretical_loss": 3.6493911867759463, + "tokens_seen": 999424000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003246311010215664, + "loss": 3.3911, + "theoretical_loss": 3.6490725059554996, + "tokens_seen": 1000341504 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003244419220582671, + "loss": 3.5018, + "theoretical_loss": 3.648708757110873, + "tokens_seen": 1001390080 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032425274309496785, + "loss": 3.4298, + "theoretical_loss": 3.6483454954775305, + "tokens_seen": 1002438656 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.48988527059555054, + "objective/train/docs_used": 571707, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.399118185043335, + "objective/train/original_loss": 3.399117946624756, + "objective/train/theoretical_loss": 3.648254756059793, + "objective/train/tokens_used": 1023160800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24150972068309784, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502160787582397, + "objective/train/weighted_lm_loss": 3.570770025253296, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9835735559463501, + "theoretical_loss": 3.648254756059793, + "tokens_seen": 1002700800 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032406356413166855, + "loss": 3.4353, + "theoretical_loss": 3.64798271989428, + "tokens_seen": 1003487232 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032387438516836924, + "loss": 3.3814, + "theoretical_loss": 3.647620429203908, + "tokens_seen": 1004535808 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032368520620507004, + "loss": 3.5187, + "theoretical_loss": 3.6472586222531587, + "tokens_seen": 1005584384 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.48494696617126465, + "objective/train/docs_used": 573416, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2771799564361572, + "objective/train/original_loss": 3.2771804332733154, + "objective/train/theoretical_loss": 3.6471230691260477, + "objective/train/tokens_used": 1026437600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23839180171489716, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497063398361206, + "objective/train/weighted_lm_loss": 3.440140962600708, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9522702693939209, + "theoretical_loss": 3.6471230691260477, + "tokens_seen": 1005977600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032349602724177074, + "loss": 3.5214, + "theoretical_loss": 3.6468972978927208, + "tokens_seen": 1006632960 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032330684827847143, + "loss": 3.4675, + "theoretical_loss": 3.646536454977205, + "tokens_seen": 1007681536 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003231176693151722, + "loss": 3.4601, + "theoretical_loss": 3.6461760923651294, + "tokens_seen": 1008730112 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.4919039309024811, + "objective/train/docs_used": 575328, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1079111099243164, + "objective/train/original_loss": 3.1079111099243164, + "objective/train/theoretical_loss": 3.645996090817232, + "objective/train/tokens_used": 1029714400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2433389574289322, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504273176193237, + "objective/train/weighted_lm_loss": 3.2646045684814453, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.959859311580658, + "theoretical_loss": 3.645996090817232, + "tokens_seen": 1009254400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032292849035187287, + "loss": 3.4448, + "theoretical_loss": 3.645816208918901, + "tokens_seen": 1009778688 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003227393113885736, + "loss": 3.3991, + "theoretical_loss": 3.6454568035048003, + "tokens_seen": 1010827264 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003225501324252743, + "loss": 3.4667, + "theoretical_loss": 3.645097874992961, + "tokens_seen": 1011875840 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.46897462010383606, + "objective/train/docs_used": 577263, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.042729139328003, + "objective/train/original_loss": 3.042728900909424, + "objective/train/theoretical_loss": 3.644873786349497, + "objective/train/tokens_used": 1032991200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2294284999370575, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0480632781982422, + "objective/train/weighted_lm_loss": 3.192321300506592, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9518375396728516, + "theoretical_loss": 3.644873786349497, + "tokens_seen": 1012531200 + }, + { + "epoch": 0.36, + "learning_rate": 0.000322360953461975, + "loss": 3.4738, + "theoretical_loss": 3.6447394222573557, + "tokens_seen": 1012924416 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032217177449867575, + "loss": 3.4734, + "theoretical_loss": 3.644381444175778, + "tokens_seen": 1013972992 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032198259553537645, + "loss": 3.475, + "theoretical_loss": 3.6440239396298244, + "tokens_seen": 1015021568 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.47905510663986206, + "objective/train/docs_used": 579051, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3449113368988037, + "objective/train/original_loss": 3.344911575317383, + "objective/train/theoretical_loss": 3.643756121307459, + "objective/train/tokens_used": 1036268000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23849982023239136, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491175651550293, + "objective/train/weighted_lm_loss": 3.5071990489959717, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9516644477844238, + "theoretical_loss": 3.643756121307459, + "tokens_seen": 1015808000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003217934165720772, + "loss": 3.4243, + "theoretical_loss": 3.643666907504879, + "tokens_seen": 1016070144 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003216042376087779, + "loss": 3.4923, + "theoretical_loss": 3.6433103466900962, + "tokens_seen": 1017118720 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032141505864547864, + "loss": 3.4227, + "theoretical_loss": 3.6429542560783856, + "tokens_seen": 1018167296 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.4651617705821991, + "objective/train/docs_used": 580918, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.703390121459961, + "objective/train/original_loss": 3.70339035987854, + "objective/train/theoretical_loss": 3.642643061639121, + "objective/train/tokens_used": 1039544800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23710434138774872, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047721028327942, + "objective/train/weighted_lm_loss": 3.879617691040039, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9520632028579712, + "theoretical_loss": 3.642643061639121, + "tokens_seen": 1019084800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003212258796821794, + "loss": 3.4734, + "theoretical_loss": 3.6425986345663914, + "tokens_seen": 1019215872 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003210367007188801, + "loss": 3.4991, + "theoretical_loss": 3.6422434810544813, + "tokens_seen": 1020264448 + }, + { + "epoch": 0.36, + "learning_rate": 0.00032084752175558077, + "loss": 3.4978, + "theoretical_loss": 3.641888794446725, + "tokens_seen": 1021313024 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.48555630445480347, + "objective/train/docs_used": 582721, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.36793851852417, + "objective/train/original_loss": 3.3679380416870117, + "objective/train/theoretical_loss": 3.6415345736508824, + "objective/train/tokens_used": 1042821600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24094036221504211, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497803688049316, + "objective/train/weighted_lm_loss": 3.535130023956299, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9528319835662842, + "theoretical_loss": 3.6415345736508824, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003206583427922815, + "loss": 3.5065, + "theoretical_loss": 3.6415345736508824, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003204691638289822, + "loss": 3.4752, + "theoretical_loss": 3.6411808175783844, + "tokens_seen": 1023410176 + }, + { + "epoch": 0.37, + "learning_rate": 0.00032027998486568296, + "loss": 3.3807, + "theoretical_loss": 3.640827525144318, + "tokens_seen": 1024458752 + }, + { + "epoch": 0.37, + "learning_rate": 0.00032009080590238365, + "loss": 3.405, + "theoretical_loss": 3.64047469526741, + "tokens_seen": 1025507328 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.47355058789253235, + "objective/train/docs_used": 584025, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.485971212387085, + "objective/train/original_loss": 3.485970973968506, + "objective/train/theoretical_loss": 3.6404306240026356, + "objective/train/tokens_used": 1046098400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2372806966304779, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485612154006958, + "objective/train/weighted_lm_loss": 3.65089750289917, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9561977386474609, + "theoretical_loss": 3.6404306240026356, + "tokens_seen": 1025638400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031990162693908435, + "loss": 3.4315, + "theoretical_loss": 3.640122326870012, + "tokens_seen": 1026555904 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003197124479757851, + "loss": 3.4454, + "theoretical_loss": 3.639770418878081, + "tokens_seen": 1027604480 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003195232690124858, + "loss": 3.4686, + "theoretical_loss": 3.6394189702211706, + "tokens_seen": 1028653056 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.4855080544948578, + "objective/train/docs_used": 586068, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.275355339050293, + "objective/train/original_loss": 3.275355100631714, + "objective/train/theoretical_loss": 3.6393311797029373, + "objective/train/tokens_used": 1049375200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2405899465084076, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497738122940063, + "objective/train/weighted_lm_loss": 3.437480926513672, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9573184847831726, + "theoretical_loss": 3.6393311797029373, + "tokens_seen": 1028915200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031933409004918653, + "loss": 3.4559, + "theoretical_loss": 3.639067979832408, + "tokens_seen": 1029701632 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003191449110858873, + "loss": 3.3891, + "theoretical_loss": 3.6387174466484824, + "tokens_seen": 1030750208 + }, + { + "epoch": 0.37, + "learning_rate": 0.000318955732122588, + "loss": 3.4252, + "theoretical_loss": 3.6383673696096297, + "tokens_seen": 1031798784 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.4844791293144226, + "objective/train/docs_used": 588066, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.536590576171875, + "objective/train/original_loss": 3.536591053009033, + "objective/train/theoretical_loss": 3.63823620810427, + "objective/train/tokens_used": 1052652000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.239736869931221, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496665239334106, + "objective/train/weighted_lm_loss": 3.7123160362243652, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9534673094749451, + "theoretical_loss": 3.63823620810427, + "tokens_seen": 1032192000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003187665531592887, + "loss": 3.4282, + "theoretical_loss": 3.638017747659614, + "tokens_seen": 1032847360 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003185773741959894, + "loss": 3.3684, + "theoretical_loss": 3.637668579745716, + "tokens_seen": 1033895936 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003183881952326901, + "loss": 3.3848, + "theoretical_loss": 3.637319864818716, + "tokens_seen": 1034944512 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.48198384046554565, + "objective/train/docs_used": 589983, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.28519344329834, + "objective/train/original_loss": 3.28519344329834, + "objective/train/theoretical_loss": 3.637145676898374, + "objective/train/tokens_used": 1055928800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2357451617717743, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493967533111572, + "objective/train/weighted_lm_loss": 3.4479753971099854, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9625754952430725, + "theoretical_loss": 3.637145676898374, + "tokens_seen": 1035468800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031819901626939086, + "loss": 3.3893, + "theoretical_loss": 3.6369716018328777, + "tokens_seen": 1035993088 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031800983730609155, + "loss": 3.3671, + "theoretical_loss": 3.6366237897459355, + "tokens_seen": 1037041664 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003178206583427923, + "loss": 3.3868, + "theoretical_loss": 3.6362764275190766, + "tokens_seen": 1038090240 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.48860275745391846, + "objective/train/docs_used": 592008, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0765490531921387, + "objective/train/original_loss": 3.0765490531921387, + "objective/train/theoretical_loss": 3.636059554111668, + "objective/train/tokens_used": 1059205600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2414240688085556, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500874519348145, + "objective/train/weighted_lm_loss": 3.2311158180236816, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9935080409049988, + "theoretical_loss": 3.636059554111668, + "tokens_seen": 1038745600 + }, + { + "epoch": 0.37, + "learning_rate": 0.000317631479379493, + "loss": 3.3592, + "theoretical_loss": 3.6359295141169303, + "tokens_seen": 1039138816 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003174423004161937, + "loss": 3.385, + "theoretical_loss": 3.6355830485075473, + "tokens_seen": 1040187392 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031725312145289443, + "loss": 3.3209, + "theoretical_loss": 3.635237029662391, + "tokens_seen": 1041235968 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.47969964146614075, + "objective/train/docs_used": 593661, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0731759071350098, + "objective/train/original_loss": 3.0731759071350098, + "objective/train/theoretical_loss": 3.6349778081007327, + "objective/train/tokens_used": 1062482400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2339300960302353, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491589307785034, + "objective/train/weighted_lm_loss": 3.2258410453796387, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9526910185813904, + "theoretical_loss": 3.6349778081007327, + "tokens_seen": 1042022400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003170639424895951, + "loss": 3.337, + "theoretical_loss": 3.6348914565563186, + "tokens_seen": 1042284544 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031687476352629593, + "loss": 3.3503, + "theoretical_loss": 3.6345463281675676, + "tokens_seen": 1043333120 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003166855845629966, + "loss": 3.339, + "theoretical_loss": 3.6342016434777427, + "tokens_seen": 1044381696 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.48656022548675537, + "objective/train/docs_used": 595739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4706473350524902, + "objective/train/original_loss": 3.470647096633911, + "objective/train/theoretical_loss": 3.6339004075478796, + "objective/train/tokens_used": 1065759200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2418631613254547, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498853921890259, + "objective/train/weighted_lm_loss": 3.6436004638671875, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9542835354804993, + "theoretical_loss": 3.6339004075478796, + "tokens_seen": 1045299200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003164964055996973, + "loss": 3.3997, + "theoretical_loss": 3.6338574014717997, + "tokens_seen": 1045430272 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031630722663639806, + "loss": 3.3249, + "theoretical_loss": 3.6335136011380307, + "tokens_seen": 1046478848 + }, + { + "epoch": 0.37, + "learning_rate": 0.00031611804767309876, + "loss": 3.3354, + "theoretical_loss": 3.6331702414680525, + "tokens_seen": 1047527424 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.477405309677124, + "objective/train/docs_used": 597756, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.253281831741333, + "objective/train/original_loss": 3.253281593322754, + "objective/train/theoretical_loss": 3.632827321456789, + "objective/train/tokens_used": 1069036000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23487040400505066, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489342212677002, + "objective/train/weighted_lm_loss": 3.4119961261749268, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9524772763252258, + "theoretical_loss": 3.632827321456789, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003159288687097995, + "loss": 3.4091, + "theoretical_loss": 3.632827321456789, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003157396897465002, + "loss": 3.3915, + "theoretical_loss": 3.6324848401024594, + "tokens_seen": 1049624576 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003155505107832009, + "loss": 3.3367, + "theoretical_loss": 3.632142796406564, + "tokens_seen": 1050673152 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031536133181990164, + "loss": 3.4277, + "theoretical_loss": 3.631801189373867, + "tokens_seen": 1051721728 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.4907068908214569, + "objective/train/docs_used": 599577, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9004616737365723, + "objective/train/original_loss": 2.9004616737365723, + "objective/train/theoretical_loss": 3.631758519148221, + "objective/train/tokens_used": 1072312800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.243531733751297, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503087043762207, + "objective/train/weighted_lm_loss": 3.046717882156372, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9606583118438721, + "theoretical_loss": 3.631758519148221, + "tokens_seen": 1051852800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031517215285660233, + "loss": 3.4022, + "theoretical_loss": 3.631460018012389, + "tokens_seen": 1052770304 + }, + { + "epoch": 0.38, + "learning_rate": 0.000314982973893303, + "loss": 3.4569, + "theoretical_loss": 3.631119281333386, + "tokens_seen": 1053818880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003147937949300038, + "loss": 3.3993, + "theoretical_loss": 3.6307789783513402, + "tokens_seen": 1054867456 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.4608767032623291, + "objective/train/docs_used": 601442, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.057370901107788, + "objective/train/original_loss": 3.057370662689209, + "objective/train/theoretical_loss": 3.630693970255794, + "objective/train/tokens_used": 1075589600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23330651223659515, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04727303981781, + "objective/train/weighted_lm_loss": 3.204721212387085, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9519646167755127, + "theoretical_loss": 3.630693970255794, + "tokens_seen": 1055129600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003146046159667045, + "loss": 3.3655, + "theoretical_loss": 3.6304391080839453, + "tokens_seen": 1055916032 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031441543700340527, + "loss": 3.3979, + "theoretical_loss": 3.630099669552091, + "tokens_seen": 1056964608 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031422625804010596, + "loss": 3.3503, + "theoretical_loss": 3.6297606617798532, + "tokens_seen": 1058013184 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.4877772033214569, + "objective/train/docs_used": 602965, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0313591957092285, + "objective/train/original_loss": 3.0313591957092285, + "objective/train/theoretical_loss": 3.629633644721836, + "objective/train/tokens_used": 1078866400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24103260040283203, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500030517578125, + "objective/train/weighted_lm_loss": 3.182788133621216, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.95267653465271, + "theoretical_loss": 3.629633644721836, + "tokens_seen": 1058406400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031403707907680666, + "loss": 3.3468, + "theoretical_loss": 3.629422083794477, + "tokens_seen": 1059061760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003138479001135074, + "loss": 3.3813, + "theoretical_loss": 3.6290839346263644, + "tokens_seen": 1060110336 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003136587211502081, + "loss": 3.3796, + "theoretical_loss": 3.6287462133090616, + "tokens_seen": 1061158912 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.4929821789264679, + "objective/train/docs_used": 604364, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3100390434265137, + "objective/train/original_loss": 3.3100390434265137, + "objective/train/theoretical_loss": 3.628577512793303, + "objective/train/tokens_used": 1082143200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24545590579509735, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505460500717163, + "objective/train/weighted_lm_loss": 3.4770169258117676, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.951737642288208, + "theoretical_loss": 3.628577512793303, + "tokens_seen": 1061683200 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031346954218690884, + "loss": 3.3702, + "theoretical_loss": 3.6284089188792445, + "tokens_seen": 1062207488 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031328036322360954, + "loss": 3.2997, + "theoretical_loss": 3.6280720503767077, + "tokens_seen": 1063256064 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031309118426031023, + "loss": 3.4449, + "theoretical_loss": 3.627735606844347, + "tokens_seen": 1064304640 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.46973249316215515, + "objective/train/docs_used": 606079, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8989434242248535, + "objective/train/original_loss": 2.8989436626434326, + "objective/train/theoretical_loss": 3.62752554501776, + "objective/train/tokens_used": 1085420000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23049893975257874, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048144817352295, + "objective/train/weighted_lm_loss": 3.0359854698181152, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9730321764945984, + "theoretical_loss": 3.62752554501776, + "tokens_seen": 1064960000 + }, + { + "epoch": 0.38, + "learning_rate": 0.000312902005297011, + "loss": 3.3205, + "theoretical_loss": 3.627399587328153, + "tokens_seen": 1065353216 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031271282633371167, + "loss": 3.3029, + "theoretical_loss": 3.6270639908771907, + "tokens_seen": 1066401792 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031252364737041237, + "loss": 3.3871, + "theoretical_loss": 3.6267288165435922, + "tokens_seen": 1067450368 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.48607712984085083, + "objective/train/docs_used": 607761, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5710361003875732, + "objective/train/original_loss": 3.5710363388061523, + "objective/train/theoretical_loss": 3.6264777122394327, + "objective/train/tokens_used": 1088696800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2421998828649521, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498387813568115, + "objective/train/weighted_lm_loss": 3.748819589614868, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9520419836044312, + "theoretical_loss": 3.6264777122394327, + "tokens_seen": 1068236800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031233446840711317, + "loss": 3.4255, + "theoretical_loss": 3.626394063382541, + "tokens_seen": 1068498944 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031214528944381386, + "loss": 3.3785, + "theoretical_loss": 3.62605973045226, + "tokens_seen": 1069547520 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003119561104805146, + "loss": 3.3981, + "theoretical_loss": 3.6257258168139987, + "tokens_seen": 1070596096 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.483851820230484, + "objective/train/docs_used": 609849, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.248103618621826, + "objective/train/original_loss": 3.248103618621826, + "objective/train/theoretical_loss": 3.6254339855953184, + "objective/train/tokens_used": 1091973600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24208344519138336, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496156215667725, + "objective/train/weighted_lm_loss": 3.4082441329956055, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9513829350471497, + "theoretical_loss": 3.6254339855953184, + "tokens_seen": 1071513600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003117669315172153, + "loss": 3.3544, + "theoretical_loss": 3.625392321532021, + "tokens_seen": 1071644672 + }, + { + "epoch": 0.38, + "learning_rate": 0.000311577752553916, + "loss": 3.375, + "theoretical_loss": 3.6250592436735904, + "tokens_seen": 1072693248 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031138857359061674, + "loss": 3.3512, + "theoretical_loss": 3.624726582308961, + "tokens_seen": 1073741824 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.4885290265083313, + "objective/train/docs_used": 611858, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.38407826423645, + "objective/train/original_loss": 3.384077548980713, + "objective/train/theoretical_loss": 3.624394336511362, + "objective/train/tokens_used": 1095250400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24165187776088715, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500812530517578, + "objective/train/weighted_lm_loss": 3.5534675121307373, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9516956806182861, + "theoretical_loss": 3.624394336511362, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00031119939462731744, + "loss": 3.3759, + "theoretical_loss": 3.624394336511362, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003110102156640182, + "loss": 3.3077, + "theoretical_loss": 3.6240625053569873, + "tokens_seen": 1075838976 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003108210367007189, + "loss": 3.3128, + "theoretical_loss": 3.6237310879249813, + "tokens_seen": 1076887552 + }, + { + "epoch": 0.39, + "learning_rate": 0.00031063185773741957, + "loss": 3.3346, + "theoretical_loss": 3.6234000832974282, + "tokens_seen": 1077936128 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.4757619798183441, + "objective/train/docs_used": 613931, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.840676784515381, + "objective/train/original_loss": 2.84067702293396, + "objective/train/theoretical_loss": 3.6233587366986946, + "objective/train/tokens_used": 1098527200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23324042558670044, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487614870071411, + "objective/train/weighted_lm_loss": 2.9804329872131348, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9518725275993347, + "theoretical_loss": 3.6233587366986946, + "tokens_seen": 1078067200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003104426787741203, + "loss": 3.2935, + "theoretical_loss": 3.623069490559339, + "tokens_seen": 1078984704 + }, + { + "epoch": 0.39, + "learning_rate": 0.000310253499810821, + "loss": 3.3639, + "theoretical_loss": 3.6227393087986393, + "tokens_seen": 1080033280 + }, + { + "epoch": 0.39, + "learning_rate": 0.00031006432084752176, + "loss": 3.3186, + "theoretical_loss": 3.622409537106158, + "tokens_seen": 1081081856 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.4866965413093567, + "objective/train/docs_used": 615851, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2716050148010254, + "objective/train/original_loss": 3.2716054916381836, + "objective/train/theoretical_loss": 3.622327158149928, + "objective/train/tokens_used": 1101804000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24117985367774963, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498956441879272, + "objective/train/weighted_lm_loss": 3.434610605239868, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.951644778251648, + "theoretical_loss": 3.622327158149928, + "tokens_seen": 1081344000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003098751418842225, + "loss": 3.3499, + "theoretical_loss": 3.622080174575613, + "tokens_seen": 1082130432 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003096859629209232, + "loss": 3.3303, + "theoretical_loss": 3.6217512203036026, + "tokens_seen": 1083179008 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030949678395762395, + "loss": 3.4256, + "theoretical_loss": 3.621422673389592, + "tokens_seen": 1084227584 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.48583704233169556, + "objective/train/docs_used": 617294, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.147465705871582, + "objective/train/original_loss": 3.147465705871582, + "objective/train/theoretical_loss": 3.621299573135513, + "objective/train/tokens_used": 1105080800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2386694699525833, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497969388961792, + "objective/train/weighted_lm_loss": 3.3032023906707764, + "objective/train/weights_max": 1.051215410232544, + "objective/train/weights_min": 0.9727230668067932, + "theoretical_loss": 3.621299573135513, + "tokens_seen": 1084620800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030930760499432464, + "loss": 3.3714, + "theoretical_loss": 3.6210945329358992, + "tokens_seen": 1085276160 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030911842603102534, + "loss": 3.3833, + "theoretical_loss": 3.6207667980476868, + "tokens_seen": 1086324736 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003089292470677261, + "loss": 3.3904, + "theoretical_loss": 3.620439467832949, + "tokens_seen": 1087373312 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.4890348017215729, + "objective/train/docs_used": 619111, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1311914920806885, + "objective/train/original_loss": 3.1311917304992676, + "objective/train/theoretical_loss": 3.620275954200152, + "objective/train/tokens_used": 1108357600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24180911481380463, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501326322555542, + "objective/train/weighted_lm_loss": 3.2881572246551514, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.955804169178009, + "theoretical_loss": 3.620275954200152, + "tokens_seen": 1087897600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003087400681044268, + "loss": 3.4003, + "theoretical_loss": 3.6201125414024986, + "tokens_seen": 1088421888 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003085508891411275, + "loss": 3.3861, + "theoretical_loss": 3.619786017869957, + "tokens_seen": 1089470464 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003083617101778282, + "loss": 3.3314, + "theoretical_loss": 3.619459896351742, + "tokens_seen": 1090519040 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.4774239659309387, + "objective/train/docs_used": 620966, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1619443893432617, + "objective/train/original_loss": 3.16194486618042, + "objective/train/theoretical_loss": 3.6192562741592726, + "objective/train/tokens_used": 1111634400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23663067817687988, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489450693130493, + "objective/train/weighted_lm_loss": 3.3209400177001953, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9530478715896606, + "theoretical_loss": 3.6192562741592726, + "tokens_seen": 1091174400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003081725312145289, + "loss": 3.3275, + "theoretical_loss": 3.6191341759670568, + "tokens_seen": 1091567616 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030798335225122966, + "loss": 3.4539, + "theoretical_loss": 3.618808855837877, + "tokens_seen": 1092616192 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003077941732879304, + "loss": 3.303, + "theoretical_loss": 3.6184839350889417, + "tokens_seen": 1093664768 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.48794177174568176, + "objective/train/docs_used": 623021, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.890498399734497, + "objective/train/original_loss": 2.890498399734497, + "objective/train/theoretical_loss": 3.6182405060955523, + "objective/train/tokens_used": 1114911200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.239480122923851, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500115156173706, + "objective/train/weighted_lm_loss": 3.0350229740142822, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9790964126586914, + "theoretical_loss": 3.6182405060955523, + "tokens_seen": 1094451200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003076049943246311, + "loss": 3.3359, + "theoretical_loss": 3.6181594128477395, + "tokens_seen": 1094713344 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030741581536133185, + "loss": 3.3334, + "theoretical_loss": 3.6178352882444997, + "tokens_seen": 1095761920 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030722663639803254, + "loss": 3.3046, + "theoretical_loss": 3.6175115604121793, + "tokens_seen": 1096810496 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.4875214993953705, + "objective/train/docs_used": 624881, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3388214111328125, + "objective/train/original_loss": 3.3388214111328125, + "objective/train/theoretical_loss": 3.617228623355502, + "objective/train/tokens_used": 1118188000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24309813976287842, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04998779296875, + "objective/train/weighted_lm_loss": 3.505209445953369, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9611717462539673, + "theoretical_loss": 3.617228623355502, + "tokens_seen": 1097728000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003070374574347333, + "loss": 3.3572, + "theoretical_loss": 3.6171882284864525, + "tokens_seen": 1097859072 + }, + { + "epoch": 0.39, + "learning_rate": 0.000306848278471434, + "loss": 3.32, + "theoretical_loss": 3.6168652916056994, + "tokens_seen": 1098907648 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003066590995081347, + "loss": 3.3374, + "theoretical_loss": 3.6165427489109963, + "tokens_seen": 1099956224 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.49022623896598816, + "objective/train/docs_used": 626669, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2733004093170166, + "objective/train/original_loss": 3.2733001708984375, + "objective/train/theoretical_loss": 3.616220599546101, + "objective/train/tokens_used": 1121464800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2436634600162506, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502612590789795, + "objective/train/weighted_lm_loss": 3.4382221698760986, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9524181485176086, + "theoretical_loss": 3.616220599546101, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003064699205448354, + "loss": 3.339, + "theoretical_loss": 3.616220599546101, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003062807415815361, + "loss": 3.2574, + "theoretical_loss": 3.615898842657448, + "tokens_seen": 1102053376 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030609156261823686, + "loss": 3.3171, + "theoretical_loss": 3.6155774773941305, + "tokens_seen": 1103101952 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030590238365493756, + "loss": 3.3261, + "theoretical_loss": 3.615256502907896, + "tokens_seen": 1104150528 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.48506712913513184, + "objective/train/docs_used": 628420, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.485072612762451, + "objective/train/original_loss": 3.485072374343872, + "objective/train/theoretical_loss": 3.6152164085314853, + "objective/train/tokens_used": 1124741600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23975829780101776, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497254133224487, + "objective/train/weighted_lm_loss": 3.658010244369507, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9524231553077698, + "theoretical_loss": 3.6152164085314853, + "tokens_seen": 1104281600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00030571320469163825, + "loss": 3.3752, + "theoretical_loss": 3.6149359183531296, + "tokens_seen": 1105199104 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030552402572833905, + "loss": 3.3202, + "theoretical_loss": 3.614615722886849, + "tokens_seen": 1106247680 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030533484676503975, + "loss": 3.3336, + "theoretical_loss": 3.614295915668691, + "tokens_seen": 1107296256 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.48640114068984985, + "objective/train/docs_used": 629445, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4100451469421387, + "objective/train/original_loss": 3.4100446701049805, + "objective/train/theoretical_loss": 3.6142160244296884, + "objective/train/tokens_used": 1128018400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23953872919082642, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498576164245605, + "objective/train/weighted_lm_loss": 3.579258441925049, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9583953022956848, + "theoretical_loss": 3.6142160244296884, + "tokens_seen": 1107558400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003051456678017405, + "loss": 3.3687, + "theoretical_loss": 3.613976495860898, + "tokens_seen": 1108344832 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003049564888384412, + "loss": 3.3225, + "theoretical_loss": 3.613657462628315, + "tokens_seen": 1109393408 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003047673098751419, + "loss": 3.3378, + "theoretical_loss": 3.613338815138371, + "tokens_seen": 1110441984 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.47437578439712524, + "objective/train/docs_used": 631502, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.02118182182312, + "objective/train/original_loss": 3.021181583404541, + "objective/train/theoretical_loss": 3.6132194216094313, + "objective/train/tokens_used": 1131295200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23702089488506317, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486419200897217, + "objective/train/weighted_lm_loss": 3.1670000553131104, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9514435529708862, + "theoretical_loss": 3.6132194216094313, + "tokens_seen": 1110835200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030457813091184263, + "loss": 3.2814, + "theoretical_loss": 3.613020552561074, + "tokens_seen": 1111490560 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003043889519485433, + "loss": 3.3688, + "theoretical_loss": 3.6127026740689967, + "tokens_seen": 1112539136 + }, + { + "epoch": 0.4, + "learning_rate": 0.000304199772985244, + "loss": 3.316, + "theoretical_loss": 3.612385178837271, + "tokens_seen": 1113587712 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.4620572030544281, + "objective/train/docs_used": 633386, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.020350694656372, + "objective/train/original_loss": 3.020350933074951, + "objective/train/theoretical_loss": 3.6122265746869653, + "objective/train/tokens_used": 1134572000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24398073554039001, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0474449396133423, + "objective/train/weighted_lm_loss": 3.1671273708343506, + "objective/train/weights_max": 1.0512152910232544, + "objective/train/weights_min": 0.9529370665550232, + "theoretical_loss": 3.6122265746869653, + "tokens_seen": 1114112000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030401059402194476, + "loss": 3.3191, + "theoretical_loss": 3.6120680660435736, + "tokens_seen": 1114636288 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030382141505864546, + "loss": 3.381, + "theoretical_loss": 3.6117513348681163, + "tokens_seen": 1115684864 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003036322360953462, + "loss": 3.3724, + "theoretical_loss": 3.611434984493637, + "tokens_seen": 1116733440 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.4832095205783844, + "objective/train/docs_used": 634684, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4889237880706787, + "objective/train/original_loss": 3.488924026489258, + "objective/train/theoretical_loss": 3.6112374585229583, + "objective/train/tokens_used": 1137848800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24187950789928436, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495502948760986, + "objective/train/weighted_lm_loss": 3.661220073699951, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9514582753181458, + "theoretical_loss": 3.6112374585229583, + "tokens_seen": 1117388800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003034430571320469, + "loss": 3.3915, + "theoretical_loss": 3.6111190141053893, + "tokens_seen": 1117782016 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030325387816874765, + "loss": 3.4511, + "theoretical_loss": 3.6108034228911334, + "tokens_seen": 1118830592 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003030646992054484, + "loss": 3.3911, + "theoretical_loss": 3.6104882100411215, + "tokens_seen": 1119879168 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.48330286145210266, + "objective/train/docs_used": 636549, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.597794771194458, + "objective/train/original_loss": 3.597794532775879, + "objective/train/theoretical_loss": 3.6102520482194387, + "objective/train/tokens_used": 1141125600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23953989148139954, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495479106903076, + "objective/train/weighted_lm_loss": 3.7755210399627686, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9581436514854431, + "theoretical_loss": 3.6102520482194387, + "tokens_seen": 1120665600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003028755202421491, + "loss": 3.468, + "theoretical_loss": 3.6101733747480957, + "tokens_seen": 1120927744 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030268634127884983, + "loss": 3.3887, + "theoretical_loss": 3.609858916207269, + "tokens_seen": 1121976320 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030249716231555053, + "loss": 3.3466, + "theoretical_loss": 3.609544833616324, + "tokens_seen": 1123024896 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.4877423942089081, + "objective/train/docs_used": 638453, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0719454288482666, + "objective/train/original_loss": 3.0719454288482666, + "objective/train/theoretical_loss": 3.6092703191167743, + "objective/train/tokens_used": 1144402400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24203670024871826, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500047206878662, + "objective/train/weighted_lm_loss": 3.2259280681610107, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9517780542373657, + "theoretical_loss": 3.6092703191167743, + "tokens_seen": 1123942400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003023079833522512, + "loss": 3.4285, + "theoretical_loss": 3.6092311261753958, + "tokens_seen": 1124073472 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030211880438895197, + "loss": 3.3911, + "theoretical_loss": 3.608917793087066, + "tokens_seen": 1125122048 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030192962542565266, + "loss": 3.3768, + "theoretical_loss": 3.608604833556355, + "tokens_seen": 1126170624 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.4918157458305359, + "objective/train/docs_used": 640230, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3360931873321533, + "objective/train/original_loss": 3.3360931873321533, + "objective/train/theoretical_loss": 3.6082922467907066, + "objective/train/tokens_used": 1147679200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24366138875484467, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504201650619507, + "objective/train/weighted_lm_loss": 3.5035665035247803, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9529560208320618, + "theoretical_loss": 3.6082922467907066, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030174044646235336, + "loss": 3.4143, + "theoretical_loss": 3.6082922467907066, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003015512674990541, + "loss": 3.3773, + "theoretical_loss": 3.6079800319999817, + "tokens_seen": 1128267776 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003013620885357548, + "loss": 3.3424, + "theoretical_loss": 3.60766818839645, + "tokens_seen": 1129316352 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030117290957245554, + "loss": 3.3289, + "theoretical_loss": 3.6073567151947774, + "tokens_seen": 1130364928 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.49019014835357666, + "objective/train/docs_used": 641368, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1225857734680176, + "objective/train/original_loss": 3.1225852966308594, + "objective/train/theoretical_loss": 3.6073178070494287, + "objective/train/tokens_used": 1150956000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24214524030685425, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502499341964722, + "objective/train/weighted_lm_loss": 3.279658079147339, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9604944586753845, + "theoretical_loss": 3.6073178070494287, + "tokens_seen": 1130496000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003009837306091563, + "loss": 3.3178, + "theoretical_loss": 3.607045611612018, + "tokens_seen": 1131413504 + }, + { + "epoch": 0.4, + "learning_rate": 0.000300794551645857, + "loss": 3.3106, + "theoretical_loss": 3.6067348768676064, + "tokens_seen": 1132462080 + }, + { + "epoch": 0.4, + "learning_rate": 0.00030060537268255773, + "loss": 3.2938, + "theoretical_loss": 3.606424510183343, + "tokens_seen": 1133510656 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.48765861988067627, + "objective/train/docs_used": 643613, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.259086847305298, + "objective/train/original_loss": 3.2590866088867188, + "objective/train/theoretical_loss": 3.6063469759307054, + "objective/train/tokens_used": 1154232800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2418700009584427, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499954223632812, + "objective/train/weighted_lm_loss": 3.421250104904175, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.957314133644104, + "theoretical_loss": 3.6063469759307054, + "tokens_seen": 1133772800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003004161937192584, + "loss": 3.3392, + "theoretical_loss": 3.606114510783391, + "tokens_seen": 1134559232 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003002270147559592, + "loss": 3.3265, + "theoretical_loss": 3.605804877894263, + "tokens_seen": 1135607808 + }, + { + "epoch": 0.41, + "learning_rate": 0.00030003783579265987, + "loss": 3.4029, + "theoretical_loss": 3.6054956107448124, + "tokens_seen": 1136656384 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.48545318841934204, + "objective/train/docs_used": 645222, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.836273431777954, + "objective/train/original_loss": 2.836273193359375, + "objective/train/theoretical_loss": 3.605379729699039, + "objective/train/tokens_used": 1157509600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2405441403388977, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049768090248108, + "objective/train/weighted_lm_loss": 2.977830648422241, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9613069891929626, + "theoretical_loss": 3.605379729699039, + "tokens_seen": 1137049600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029984865682936056, + "loss": 3.3281, + "theoretical_loss": 3.605186708566225, + "tokens_seen": 1137704960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002996594778660613, + "loss": 3.33, + "theoretical_loss": 3.6048781705920105, + "tokens_seen": 1138753536 + }, + { + "epoch": 0.41, + "learning_rate": 0.000299470298902762, + "loss": 3.3641, + "theoretical_loss": 3.6045699960579896, + "tokens_seen": 1139802112 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.4913421869277954, + "objective/train/docs_used": 646474, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3484857082366943, + "objective/train/original_loss": 3.3484854698181152, + "objective/train/theoretical_loss": 3.6044160448428775, + "objective/train/tokens_used": 1160786400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24510613083839417, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503802299499512, + "objective/train/weighted_lm_loss": 3.5174074172973633, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9515740871429443, + "theoretical_loss": 3.6044160448428775, + "tokens_seen": 1140326400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002992811199394627, + "loss": 3.3049, + "theoretical_loss": 3.60426218420229, + "tokens_seen": 1140850688 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029909194097616344, + "loss": 3.3031, + "theoretical_loss": 3.603954734265334, + "tokens_seen": 1141899264 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029890276201286414, + "loss": 3.3502, + "theoretical_loss": 3.60364764548983, + "tokens_seen": 1142947840 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.48611900210380554, + "objective/train/docs_used": 648095, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.445695638656616, + "objective/train/original_loss": 3.445695400238037, + "objective/train/theoretical_loss": 3.603455898071866, + "objective/train/tokens_used": 1164063200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2388918399810791, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498261451721191, + "objective/train/weighted_lm_loss": 3.6170575618743896, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9571871757507324, + "theoretical_loss": 3.603455898071866, + "tokens_seen": 1143603200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029871358304956494, + "loss": 3.3775, + "theoretical_loss": 3.6033409171207644, + "tokens_seen": 1143996416 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029852440408626563, + "loss": 3.2954, + "theoretical_loss": 3.6030345484053923, + "tokens_seen": 1145044992 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002983352251229663, + "loss": 3.3485, + "theoretical_loss": 3.602728538593227, + "tokens_seen": 1146093568 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.47712090611457825, + "objective/train/docs_used": 649861, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.083465576171875, + "objective/train/original_loss": 3.083465337753296, + "objective/train/theoretical_loss": 3.6024992663141386, + "objective/train/tokens_used": 1167340000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2367585003376007, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489155054092407, + "objective/train/weighted_lm_loss": 3.231121301651001, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9521268010139465, + "theoretical_loss": 3.6024992663141386, + "tokens_seen": 1146880000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002981460461596671, + "loss": 3.2815, + "theoretical_loss": 3.6024228869360346, + "tokens_seen": 1147142144 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029795686719636777, + "loss": 3.3857, + "theoretical_loss": 3.602117592687822, + "tokens_seen": 1148190720 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002977676882330685, + "loss": 3.3291, + "theoretical_loss": 3.6018126551048306, + "tokens_seen": 1149239296 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.4793562889099121, + "objective/train/docs_used": 651918, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1603188514709473, + "objective/train/original_loss": 3.1603193283081055, + "objective/train/theoretical_loss": 3.601546126713652, + "objective/train/tokens_used": 1170616800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2417171150445938, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491641759872437, + "objective/train/weighted_lm_loss": 3.314483642578125, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.952265739440918, + "theoretical_loss": 3.601546126713652, + "tokens_seen": 1150156800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002975785092697692, + "loss": 3.3884, + "theoretical_loss": 3.6015080734455243, + "tokens_seen": 1150287872 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002973893303064699, + "loss": 3.4104, + "theoretical_loss": 3.601203846970585, + "tokens_seen": 1151336448 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029720015134317065, + "loss": 3.3489, + "theoretical_loss": 3.6008999749429007, + "tokens_seen": 1152385024 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.48654869198799133, + "objective/train/docs_used": 653958, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.908597946166992, + "objective/train/original_loss": 2.908597469329834, + "objective/train/theoretical_loss": 3.6005964566275575, + "objective/train/tokens_used": 1173893600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24034003913402557, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498765707015991, + "objective/train/weighted_lm_loss": 3.0537478923797607, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9514062404632568, + "theoretical_loss": 3.6005964566275575, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029701097237987134, + "loss": 3.3159, + "theoretical_loss": 3.6005964566275575, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029682179341657204, + "loss": 3.3387, + "theoretical_loss": 3.600293291291833, + "tokens_seen": 1154482176 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002966326144532728, + "loss": 3.3589, + "theoretical_loss": 3.5999904782051866, + "tokens_seen": 1155530752 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029644343548997353, + "loss": 3.4099, + "theoretical_loss": 3.5996880166392486, + "tokens_seen": 1156579328 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.4857807159423828, + "objective/train/docs_used": 655833, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2272090911865234, + "objective/train/original_loss": 3.2272090911865234, + "objective/train/theoretical_loss": 3.5996502336236142, + "objective/train/tokens_used": 1177170400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24008683860301971, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497984886169434, + "objective/train/weighted_lm_loss": 3.3878610134124756, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9605950117111206, + "theoretical_loss": 3.5996502336236142, + "tokens_seen": 1156710400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002962542565266743, + "loss": 3.3243, + "theoretical_loss": 3.599385905867816, + "tokens_seen": 1157627904 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029606507756337497, + "loss": 3.2962, + "theoretical_loss": 3.5990841451668416, + "tokens_seen": 1158676480 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029587589860007567, + "loss": 3.3185, + "theoretical_loss": 3.598782733814426, + "tokens_seen": 1159725056 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.4894789755344391, + "objective/train/docs_used": 657914, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.035065174102783, + "objective/train/original_loss": 3.035065174102783, + "objective/train/theoretical_loss": 3.5987074354776407, + "objective/train/tokens_used": 1180447200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24258311092853546, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501810312271118, + "objective/train/weighted_lm_loss": 3.187278985977173, + "objective/train/weights_max": 1.0512194633483887, + "objective/train/weights_min": 0.9514700770378113, + "theoretical_loss": 3.5987074354776407, + "tokens_seen": 1159987200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002956867196367764, + "loss": 3.2898, + "theoretical_loss": 3.598481671090809, + "tokens_seen": 1160773632 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002954975406734771, + "loss": 3.2237, + "theoretical_loss": 3.5981809562783633, + "tokens_seen": 1161822208 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029530836171017785, + "loss": 3.2926, + "theoretical_loss": 3.5978805886615834, + "tokens_seen": 1162870784 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.4899119734764099, + "objective/train/docs_used": 659757, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9733612537384033, + "objective/train/original_loss": 2.9733614921569824, + "objective/train/theoretical_loss": 3.597768040171002, + "objective/train/tokens_used": 1183724000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24554485082626343, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502394437789917, + "objective/train/weighted_lm_loss": 3.123426914215088, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9529068470001221, + "theoretical_loss": 3.597768040171002, + "tokens_seen": 1163264000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029511918274687855, + "loss": 3.2544, + "theoretical_loss": 3.5975805675270784, + "tokens_seen": 1163919360 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029493000378357924, + "loss": 3.3266, + "theoretical_loss": 3.5972808921635666, + "tokens_seen": 1164967936 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029474082482028, + "loss": 3.3658, + "theoretical_loss": 3.5969815618618615, + "tokens_seen": 1166016512 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.4765141010284424, + "objective/train/docs_used": 661442, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5711071491241455, + "objective/train/original_loss": 3.571107864379883, + "objective/train/theoretical_loss": 3.5968320258881388, + "objective/train/tokens_used": 1187000800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2353520691394806, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488474369049072, + "objective/train/weighted_lm_loss": 3.7464940547943115, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9515637755393982, + "theoretical_loss": 3.5968320258881388, + "tokens_seen": 1166540800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002945516458569807, + "loss": 3.3019, + "theoretical_loss": 3.5966825759148704, + "tokens_seen": 1167065088 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002943624668936814, + "loss": 3.3644, + "theoretical_loss": 3.5963839336175814, + "tokens_seen": 1168113664 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002941732879303822, + "loss": 3.3195, + "theoretical_loss": 3.596085634267058, + "tokens_seen": 1169162240 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.48476114869117737, + "objective/train/docs_used": 663194, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.447826385498047, + "objective/train/original_loss": 3.447826862335205, + "objective/train/theoretical_loss": 3.595899371014127, + "objective/train/tokens_used": 1190277600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23816771805286407, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496866703033447, + "objective/train/weighted_lm_loss": 3.619957208633423, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9524836540222168, + "theoretical_loss": 3.595899371014127, + "tokens_seen": 1169817600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029398410896708287, + "loss": 3.4283, + "theoretical_loss": 3.5957876771624298, + "tokens_seen": 1170210816 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002937949300037836, + "loss": 3.3523, + "theoretical_loss": 3.5954900616048855, + "tokens_seen": 1171259392 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002936057510404843, + "loss": 3.3727, + "theoretical_loss": 3.5951927868976643, + "tokens_seen": 1172307968 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.4848986566066742, + "objective/train/docs_used": 665258, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.420736312866211, + "objective/train/original_loss": 3.4207358360290527, + "objective/train/theoretical_loss": 3.594970054132281, + "objective/train/tokens_used": 1193554400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23861315846443176, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049702763557434, + "objective/train/weighted_lm_loss": 3.590325355529785, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9676954746246338, + "theoretical_loss": 3.594970054132281, + "tokens_seen": 1173094400 + }, + { + "epoch": 0.42, + "learning_rate": 0.000293416572077185, + "loss": 3.3649, + "theoretical_loss": 3.5948958523460495, + "tokens_seen": 1173356544 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029322739311388575, + "loss": 3.281, + "theoretical_loss": 3.5945992572573577, + "tokens_seen": 1174405120 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029303821415058645, + "loss": 3.3138, + "theoretical_loss": 3.5943030009409345, + "tokens_seen": 1175453696 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.47306984663009644, + "objective/train/docs_used": 667248, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9362661838531494, + "objective/train/original_loss": 2.936265707015991, + "objective/train/theoretical_loss": 3.594044054021782, + "objective/train/tokens_used": 1196831200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23313362896442413, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484918355941772, + "objective/train/weighted_lm_loss": 3.077251434326172, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9524040818214417, + "theoretical_loss": 3.594044054021782, + "tokens_seen": 1176371200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002928490351872872, + "loss": 3.3222, + "theoretical_loss": 3.5940070827081443, + "tokens_seen": 1176502272 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002926598562239879, + "loss": 3.3223, + "theoretical_loss": 3.593711501872364, + "tokens_seen": 1177550848 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002924706772606886, + "loss": 3.3199, + "theoretical_loss": 3.5934162577489746, + "tokens_seen": 1178599424 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.48706483840942383, + "objective/train/docs_used": 669029, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9470417499542236, + "objective/train/original_loss": 2.9470419883728027, + "objective/train/theoretical_loss": 3.5931213496553536, + "objective/train/tokens_used": 1200108000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2409461885690689, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499311685562134, + "objective/train/weighted_lm_loss": 3.094271421432495, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9517722725868225, + "theoretical_loss": 3.5931213496553536, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029228149829738933, + "loss": 3.3198, + "theoretical_loss": 3.5931213496553536, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029209231933409, + "loss": 3.397, + "theoretical_loss": 3.5928267769108677, + "tokens_seen": 1180696576 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002919031403707908, + "loss": 3.2854, + "theoretical_loss": 3.5925325388368656, + "tokens_seen": 1181745152 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002917139614074915, + "loss": 3.2747, + "theoretical_loss": 3.5922386347566695, + "tokens_seen": 1182793728 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.4930936098098755, + "objective/train/docs_used": 670515, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0000312328338623, + "objective/train/original_loss": 3.000030994415283, + "objective/train/theoretical_loss": 3.592201920196959, + "objective/train/tokens_used": 1203384800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24434438347816467, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050551414489746, + "objective/train/weighted_lm_loss": 3.1516330242156982, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9934404492378235, + "theoretical_loss": 3.592201920196959, + "tokens_seen": 1182924800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002915247824441922, + "loss": 3.2816, + "theoretical_loss": 3.591945063995568, + "tokens_seen": 1183842304 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029133560348089296, + "loss": 3.2813, + "theoretical_loss": 3.591651825880809, + "tokens_seen": 1184890880 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029114642451759365, + "loss": 3.2885, + "theoretical_loss": 3.591358919741592, + "tokens_seen": 1185939456 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.4882916212081909, + "objective/train/docs_used": 672504, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.118218421936035, + "objective/train/original_loss": 3.118218421936035, + "objective/train/theoretical_loss": 3.591285744999542, + "objective/train/tokens_used": 1206661600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.242011159658432, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500593185424805, + "objective/train/weighted_lm_loss": 3.27449107170105, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9519306421279907, + "theoretical_loss": 3.591285744999542, + "tokens_seen": 1186201600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029095724555429435, + "loss": 3.3389, + "theoretical_loss": 3.591066344909062, + "tokens_seen": 1186988032 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002907680665909951, + "loss": 3.4149, + "theoretical_loss": 3.590774100716298, + "tokens_seen": 1188036608 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002905788876276958, + "loss": 3.3775, + "theoretical_loss": 3.5904821864983116, + "tokens_seen": 1189085184 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.49263498187065125, + "objective/train/docs_used": 674473, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1062722206115723, + "objective/train/original_loss": 3.1062724590301514, + "objective/train/theoretical_loss": 3.590372803602795, + "objective/train/tokens_used": 1209938400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24459204077720642, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050506830215454, + "objective/train/weighted_lm_loss": 3.2633543014526367, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.9603816866874695, + "theoretical_loss": 3.590372803602795, + "tokens_seen": 1189478400 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029038970866439653, + "loss": 3.3122, + "theoretical_loss": 3.5901906015920355, + "tokens_seen": 1190133760 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029020052970109723, + "loss": 3.3855, + "theoretical_loss": 3.5898993453363173, + "tokens_seen": 1191182336 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002900113507377979, + "loss": 3.3267, + "theoretical_loss": 3.5896084170719127, + "tokens_seen": 1192230912 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.4631289541721344, + "objective/train/docs_used": 676257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0787904262542725, + "objective/train/original_loss": 3.0787906646728516, + "objective/train/theoretical_loss": 3.589463075730959, + "objective/train/tokens_used": 1213215200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23281329870224, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0474958419799805, + "objective/train/weighted_lm_loss": 3.2263834476470947, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9525740742683411, + "theoretical_loss": 3.589463075730959, + "tokens_seen": 1192755200 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028982217177449867, + "loss": 3.3403, + "theoretical_loss": 3.5893178161414783, + "tokens_seen": 1193279488 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002896329928111994, + "loss": 3.3054, + "theoretical_loss": 3.589027541889564, + "tokens_seen": 1194328064 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028944381384790016, + "loss": 3.3027, + "theoretical_loss": 3.5887375936626067, + "tokens_seen": 1195376640 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.47915610671043396, + "objective/train/docs_used": 678049, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.249417304992676, + "objective/train/original_loss": 3.249417304992676, + "objective/train/theoretical_loss": 3.5885565412906617, + "objective/train/tokens_used": 1216492000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2335328310728073, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049102544784546, + "objective/train/weighted_lm_loss": 3.4090421199798584, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9726335406303406, + "theoretical_loss": 3.5885565412906617, + "tokens_seen": 1196032000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028925463488460086, + "loss": 3.3897, + "theoretical_loss": 3.5884479708089216, + "tokens_seen": 1196425216 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028906545592130155, + "loss": 3.2624, + "theoretical_loss": 3.5881586726786976, + "tokens_seen": 1197473792 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002888762769580023, + "loss": 3.3199, + "theoretical_loss": 3.587869698623987, + "tokens_seen": 1198522368 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.48205363750457764, + "objective/train/docs_used": 679914, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.01973557472229, + "objective/train/original_loss": 3.019735813140869, + "objective/train/theoretical_loss": 3.5876531803687786, + "objective/train/tokens_used": 1219768800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2353292852640152, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494015216827393, + "objective/train/weighted_lm_loss": 3.1688785552978516, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9905664920806885, + "theoretical_loss": 3.5876531803687786, + "tokens_seen": 1199308800 + }, + { + "epoch": 0.43, + "learning_rate": 0.000288687097994703, + "loss": 3.2923, + "theoretical_loss": 3.587581047998703, + "tokens_seen": 1199570944 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002884979190314037, + "loss": 3.3503, + "theoretical_loss": 3.587292720158608, + "tokens_seen": 1200619520 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028830874006810443, + "loss": 3.3273, + "theoretical_loss": 3.58700471446131, + "tokens_seen": 1201668096 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.49034416675567627, + "objective/train/docs_used": 681814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.349426031112671, + "objective/train/original_loss": 3.3494255542755127, + "objective/train/theoretical_loss": 3.5867529732303307, + "objective/train/tokens_used": 1223045600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24394987523555756, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502744913101196, + "objective/train/weighted_lm_loss": 3.517376184463501, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9536815285682678, + "theoretical_loss": 3.5867529732303307, + "tokens_seen": 1202585600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002881195611048051, + "loss": 3.3424, + "theoretical_loss": 3.5867170302662537, + "tokens_seen": 1202716672 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002879303821415059, + "loss": 3.3676, + "theoretical_loss": 3.586429666934716, + "tokens_seen": 1203765248 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028774120317820657, + "loss": 3.3343, + "theoretical_loss": 3.5861426238297964, + "tokens_seen": 1204813824 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.4898318946361542, + "objective/train/docs_used": 683756, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4327945709228516, + "objective/train/original_loss": 3.4327945709228516, + "objective/train/theoretical_loss": 3.585855900316411, + "objective/train/tokens_used": 1226322400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24356377124786377, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502212047576904, + "objective/train/weighted_lm_loss": 3.606135368347168, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9553044438362122, + "theoretical_loss": 3.585855900316411, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028755202421490726, + "loss": 3.354, + "theoretical_loss": 3.585855900316411, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028736284525160806, + "loss": 3.3778, + "theoretical_loss": 3.5855694957612894, + "tokens_seen": 1206910976 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028717366628830876, + "loss": 3.3674, + "theoretical_loss": 3.58528340953296, + "tokens_seen": 1207959552 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002869844873250095, + "loss": 3.348, + "theoretical_loss": 3.5849976410017526, + "tokens_seen": 1209008128 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.4933568835258484, + "objective/train/docs_used": 685102, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0827248096466064, + "objective/train/original_loss": 3.0827245712280273, + "objective/train/theoretical_loss": 3.5849619422421393, + "objective/train/tokens_used": 1229599200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24562005698680878, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050584316253662, + "objective/train/weighted_lm_loss": 3.2384722232818604, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9516139626502991, + "theoretical_loss": 3.5849619422421393, + "tokens_seen": 1209139200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002867953083617102, + "loss": 3.3845, + "theoretical_loss": 3.5847121895397844, + "tokens_seen": 1210056704 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002866061293984109, + "loss": 3.3672, + "theoretical_loss": 3.5844270545209582, + "tokens_seen": 1211105280 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028641695043511164, + "loss": 3.4141, + "theoretical_loss": 3.584142235320952, + "tokens_seen": 1212153856 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.493893563747406, + "objective/train/docs_used": 686738, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.313899517059326, + "objective/train/original_loss": 3.313899517059326, + "objective/train/theoretical_loss": 3.584071079794647, + "objective/train/tokens_used": 1232876000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24534015357494354, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506365299224854, + "objective/train/weighted_lm_loss": 3.4817614555358887, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9518520832061768, + "theoretical_loss": 3.584071079794647, + "tokens_seen": 1212416000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028622777147181233, + "loss": 3.381, + "theoretical_loss": 3.5838577313172157, + "tokens_seen": 1213202432 + }, + { + "epoch": 0.43, + "learning_rate": 0.000286038592508513, + "loss": 3.3996, + "theoretical_loss": 3.5835735418889616, + "tokens_seen": 1214251008 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002858494135452138, + "loss": 3.2881, + "theoretical_loss": 3.583289666417161, + "tokens_seen": 1215299584 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.4792419672012329, + "objective/train/docs_used": 688520, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3605501651763916, + "objective/train/original_loss": 3.3605504035949707, + "objective/train/theoretical_loss": 3.583183293931091, + "objective/train/tokens_used": 1236152800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.241739884018898, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491528511047363, + "objective/train/weighted_lm_loss": 3.524282693862915, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9515173435211182, + "theoretical_loss": 3.583183293931091, + "tokens_seen": 1215692800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00028566023458191447, + "loss": 3.3747, + "theoretical_loss": 3.5830061042845363, + "tokens_seen": 1216348160 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002854710556186152, + "loss": 3.3662, + "theoretical_loss": 3.582722854875552, + "tokens_seen": 1217396736 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002852818766553159, + "loss": 3.381, + "theoretical_loss": 3.5824399175764126, + "tokens_seen": 1218445312 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.47432267665863037, + "objective/train/docs_used": 690313, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.373595714569092, + "objective/train/original_loss": 3.373595714569092, + "objective/train/theoretical_loss": 3.5822985657766973, + "objective/train/tokens_used": 1239429600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23132449388504028, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486078262329102, + "objective/train/weighted_lm_loss": 3.5375053882598877, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9576109647750854, + "theoretical_loss": 3.5822985657766973, + "tokens_seen": 1218969600 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028509269769201666, + "loss": 3.3976, + "theoretical_loss": 3.5821572917750535, + "tokens_seen": 1219493888 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002849035187287174, + "loss": 3.3473, + "theoretical_loss": 3.5818749768611364, + "tokens_seen": 1220542464 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002847143397654181, + "loss": 3.3782, + "theoretical_loss": 3.5815929722260402, + "tokens_seen": 1221591040 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.4889954626560211, + "objective/train/docs_used": 692066, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.303687572479248, + "objective/train/original_loss": 3.303687572479248, + "objective/train/theoretical_loss": 3.5814168766228267, + "objective/train/tokens_used": 1242706400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24146433174610138, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501270294189453, + "objective/train/weighted_lm_loss": 3.468892812728882, + "objective/train/weights_max": 1.0512158870697021, + "objective/train/weights_min": 0.9796527028083801, + "theoretical_loss": 3.5814168766228267, + "tokens_seen": 1222246400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028452516080211884, + "loss": 3.3363, + "theoretical_loss": 3.5813112772628575, + "tokens_seen": 1222639616 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028433598183881954, + "loss": 3.3338, + "theoretical_loss": 3.581029891366387, + "tokens_seen": 1223688192 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028414680287552023, + "loss": 3.2845, + "theoretical_loss": 3.5807488139331274, + "tokens_seen": 1224736768 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.4919726550579071, + "objective/train/docs_used": 694202, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8666977882385254, + "objective/train/original_loss": 2.8666977882385254, + "objective/train/theoretical_loss": 3.580538207925077, + "objective/train/tokens_used": 1245983200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2440943568944931, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504380464553833, + "objective/train/weighted_lm_loss": 3.0116186141967773, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9593926668167114, + "theoretical_loss": 3.580538207925077, + "tokens_seen": 1225523200 + }, + { + "epoch": 0.44, + "learning_rate": 0.000283957623912221, + "loss": 3.365, + "theoretical_loss": 3.5804680443612718, + "tokens_seen": 1225785344 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028376844494892167, + "loss": 3.3101, + "theoretical_loss": 3.5801875820506988, + "tokens_seen": 1226833920 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028357926598562237, + "loss": 3.2876, + "theoretical_loss": 3.579907426402972, + "tokens_seen": 1227882496 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.47936928272247314, + "objective/train/docs_used": 696236, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1124532222747803, + "objective/train/original_loss": 3.1124534606933594, + "objective/train/theoretical_loss": 3.579662541301401, + "objective/train/tokens_used": 1249260000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2382909506559372, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491482019424438, + "objective/train/weighted_lm_loss": 3.266861915588379, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9547244310379028, + "theoretical_loss": 3.579662541301401, + "tokens_seen": 1228800000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002833900870223231, + "loss": 3.3348, + "theoretical_loss": 3.579627576821328, + "tokens_seen": 1228931072 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002832009080590238, + "loss": 3.3281, + "theoretical_loss": 3.579348032710672, + "tokens_seen": 1229979648 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028301172909572455, + "loss": 3.349, + "theoretical_loss": 3.5790687934775747, + "tokens_seen": 1231028224 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.47580137848854065, + "objective/train/docs_used": 697487, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3753488063812256, + "objective/train/original_loss": 3.3753488063812256, + "objective/train/theoretical_loss": 3.5787898585302615, + "objective/train/tokens_used": 1252536800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2382103055715561, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487911701202393, + "objective/train/weighted_lm_loss": 3.5415258407592773, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9519994854927063, + "theoretical_loss": 3.5787898585302615, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002828225501324253, + "loss": 3.2565, + "theoretical_loss": 3.5787898585302615, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.44, + "learning_rate": 0.000282633371169126, + "loss": 3.3331, + "theoretical_loss": 3.57851122727861, + "tokens_seen": 1233125376 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028244419220582674, + "loss": 3.2767, + "theoretical_loss": 3.578232899134143, + "tokens_seen": 1234173952 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028225501324252744, + "loss": 3.286, + "theoretical_loss": 3.5779548735100217, + "tokens_seen": 1235222528 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.4829152226448059, + "objective/train/docs_used": 699233, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.290647506713867, + "objective/train/original_loss": 3.2906479835510254, + "objective/train/theoretical_loss": 3.577920141548805, + "objective/train/tokens_used": 1255813600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24048534035682678, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495140552520752, + "objective/train/weighted_lm_loss": 3.452521562576294, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9529663920402527, + "theoretical_loss": 3.577920141548805, + "tokens_seen": 1235353600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002820658342792282, + "loss": 3.2778, + "theoretical_loss": 3.5776771498210413, + "tokens_seen": 1236271104 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002818766553159289, + "loss": 3.252, + "theoretical_loss": 3.5773997274836224, + "tokens_seen": 1237319680 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028168747635262957, + "loss": 3.3259, + "theoretical_loss": 3.577122605915809, + "tokens_seen": 1238368256 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.479489803314209, + "objective/train/docs_used": 700968, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0772054195404053, + "objective/train/original_loss": 3.0772056579589844, + "objective/train/theoretical_loss": 3.5770533724510627, + "objective/train/tokens_used": 1259090400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23797892034053802, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491585731506348, + "objective/train/weighted_lm_loss": 3.22639536857605, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9590162038803101, + "theoretical_loss": 3.5770533724510627, + "tokens_seen": 1238630400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002814982973893303, + "loss": 3.2479, + "theoretical_loss": 3.5768457845372597, + "tokens_seen": 1239416832 + }, + { + "epoch": 0.44, + "learning_rate": 0.000281309118426031, + "loss": 3.2973, + "theoretical_loss": 3.576569262769242, + "tokens_seen": 1240465408 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028111993946273176, + "loss": 3.2963, + "theoretical_loss": 3.576293040034628, + "tokens_seen": 1241513984 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.48357242345809937, + "objective/train/docs_used": 702465, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.286665916442871, + "objective/train/original_loss": 3.286665916442871, + "objective/train/theoretical_loss": 3.576189533486179, + "objective/train/tokens_used": 1262367200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23855482041835785, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049569845199585, + "objective/train/weighted_lm_loss": 3.4492075443267822, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9514651298522949, + "theoretical_loss": 3.576189533486179, + "tokens_seen": 1241907200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028093076049943245, + "loss": 3.3817, + "theoretical_loss": 3.576017115757886, + "tokens_seen": 1242562560 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028074158153613315, + "loss": 3.3114, + "theoretical_loss": 3.57574148936508, + "tokens_seen": 1243611136 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028055240257283395, + "loss": 3.319, + "theoretical_loss": 3.575466160283857, + "tokens_seen": 1244659712 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.484239399433136, + "objective/train/docs_used": 704525, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2964892387390137, + "objective/train/original_loss": 3.2964892387390137, + "objective/train/theoretical_loss": 3.5753286070566617, + "objective/train/tokens_used": 1265644000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24136582016944885, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496509075164795, + "objective/train/weighted_lm_loss": 3.460113763809204, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9523569941520691, + "theoretical_loss": 3.5753286070566617, + "tokens_seen": 1245184000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00028036322360953464, + "loss": 3.3019, + "theoretical_loss": 3.575191127943446, + "tokens_seen": 1245708288 + }, + { + "epoch": 0.45, + "learning_rate": 0.00028017404464623534, + "loss": 3.2625, + "theoretical_loss": 3.574916391774651, + "tokens_seen": 1246756864 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002799848656829361, + "loss": 3.2362, + "theoretical_loss": 3.5746419512098457, + "tokens_seen": 1247805440 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.4908085763454437, + "objective/train/docs_used": 706749, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0369081497192383, + "objective/train/original_loss": 3.0369081497192383, + "objective/train/theoretical_loss": 3.5744705757166564, + "objective/train/tokens_used": 1268920800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24289442598819733, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050315499305725, + "objective/train/weighted_lm_loss": 3.190009593963623, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9634060263633728, + "theoretical_loss": 3.5744705757166564, + "tokens_seen": 1248460800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002797956867196368, + "loss": 3.281, + "theoretical_loss": 3.574367805682967, + "tokens_seen": 1248854016 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002796065077563375, + "loss": 3.2693, + "theoretical_loss": 3.57409395462951, + "tokens_seen": 1249902592 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002794173287930382, + "loss": 3.1934, + "theoretical_loss": 3.5738203974865224, + "tokens_seen": 1250951168 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.4866064190864563, + "objective/train/docs_used": 708687, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3080501556396484, + "objective/train/original_loss": 3.3080499172210693, + "objective/train/theoretical_loss": 3.5736154221702483, + "objective/train/tokens_used": 1272197600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23975151777267456, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498794317245483, + "objective/train/weighted_lm_loss": 3.472594976425171, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9527500867843628, + "theoretical_loss": 3.5736154221702483, + "tokens_seen": 1251737600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002792281498297389, + "loss": 3.2981, + "theoretical_loss": 3.5735471336925984, + "tokens_seen": 1251999744 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027903897086643966, + "loss": 3.2379, + "theoretical_loss": 3.5732741626878743, + "tokens_seen": 1253048320 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027884979190314035, + "loss": 3.2213, + "theoretical_loss": 3.5730014839140223, + "tokens_seen": 1254096896 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.460138201713562, + "objective/train/docs_used": 710568, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8919012546539307, + "objective/train/original_loss": 2.8919010162353516, + "objective/train/theoretical_loss": 3.5727631292697843, + "objective/train/tokens_used": 1275474400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2265445441007614, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047165036201477, + "objective/train/weighted_lm_loss": 3.0274980068206787, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9611805081367493, + "theoretical_loss": 3.5727631292697843, + "tokens_seen": 1255014400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002786606129398411, + "loss": 3.1932, + "theoretical_loss": 3.5727290968142444, + "tokens_seen": 1255145472 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002784714339765418, + "loss": 3.2294, + "theoretical_loss": 3.572457000833267, + "tokens_seen": 1256194048 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027828225501324254, + "loss": 3.2202, + "theoretical_loss": 3.5721851954173376, + "tokens_seen": 1257242624 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.4938296675682068, + "objective/train/docs_used": 712328, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2921712398529053, + "objective/train/original_loss": 3.292171001434326, + "objective/train/theoretical_loss": 3.571913680014217, + "objective/train/tokens_used": 1278751200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2448359876871109, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050627589225769, + "objective/train/weighted_lm_loss": 3.4590396881103516, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9618917107582092, + "theoretical_loss": 3.571913680014217, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002780930760499433, + "loss": 3.2465, + "theoretical_loss": 3.571913680014217, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.45, + "learning_rate": 0.000277903897086644, + "loss": 3.216, + "theoretical_loss": 3.5716424540731735, + "tokens_seen": 1259339776 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002777147181233447, + "loss": 3.2931, + "theoretical_loss": 3.571371517044981, + "tokens_seen": 1260388352 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002775255391600454, + "loss": 3.2458, + "theoretical_loss": 3.571100868381909, + "tokens_seen": 1261436928 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.4796658754348755, + "objective/train/docs_used": 713944, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1429102420806885, + "objective/train/original_loss": 3.1429104804992676, + "objective/train/theoretical_loss": 3.5710670575474763, + "objective/train/tokens_used": 1282028000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23792652785778046, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491758584976196, + "objective/train/weighted_lm_loss": 3.297544240951538, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9516644477844238, + "theoretical_loss": 3.5710670575474763, + "tokens_seen": 1261568000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002773363601967461, + "loss": 3.2961, + "theoretical_loss": 3.5708305075377207, + "tokens_seen": 1262485504 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027714718123344686, + "loss": 3.3117, + "theoretical_loss": 3.5705604339676666, + "tokens_seen": 1263534080 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027695800227014756, + "loss": 3.2667, + "theoretical_loss": 3.57029064712848, + "tokens_seen": 1264582656 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.49083083868026733, + "objective/train/docs_used": 715923, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.367497444152832, + "objective/train/original_loss": 3.367497444152832, + "objective/train/theoretical_loss": 3.570223245156858, + "objective/train/tokens_used": 1285304800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2441437840461731, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503242015838623, + "objective/train/weighted_lm_loss": 3.536487102508545, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9521356821060181, + "theoretical_loss": 3.570223245156858, + "tokens_seen": 1264844800 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027676882330684825, + "loss": 3.3079, + "theoretical_loss": 3.5700211464783687, + "tokens_seen": 1265631232 + }, + { + "epoch": 0.45, + "learning_rate": 0.000276579644343549, + "loss": 3.2347, + "theoretical_loss": 3.5697519314770148, + "tokens_seen": 1266679808 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002763904653802497, + "loss": 3.3075, + "theoretical_loss": 3.5694830015855636, + "tokens_seen": 1267728384 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.4871194660663605, + "objective/train/docs_used": 717630, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2038087844848633, + "objective/train/original_loss": 3.2038087844848633, + "objective/train/theoretical_loss": 3.569382226271438, + "objective/train/tokens_used": 1288581600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24120700359344482, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499380826950073, + "objective/train/weighted_lm_loss": 3.3642735481262207, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9537880420684814, + "theoretical_loss": 3.569382226271438, + "tokens_seen": 1268121600 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027620128641695044, + "loss": 3.2755, + "theoretical_loss": 3.569214356266625, + "tokens_seen": 1268776960 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002760121074536512, + "loss": 3.2224, + "theoretical_loss": 3.5689459949842623, + "tokens_seen": 1269825536 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002758229284903519, + "loss": 3.2175, + "theoretical_loss": 3.5686779172039906, + "tokens_seen": 1270874112 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.49091073870658875, + "objective/train/docs_used": 719315, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.267247438430786, + "objective/train/original_loss": 3.267247200012207, + "objective/train/theoretical_loss": 3.568543984460508, + "objective/train/tokens_used": 1291858400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24294719099998474, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503261089324951, + "objective/train/weighted_lm_loss": 3.4315378665924072, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9729084372520447, + "theoretical_loss": 3.568543984460508, + "tokens_seen": 1271398400 + }, + { + "epoch": 0.45, + "learning_rate": 0.00027563374952705263, + "loss": 3.2281, + "theoretical_loss": 3.5684101223927702, + "tokens_seen": 1271922688 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002754445705637533, + "loss": 3.2489, + "theoretical_loss": 3.568142610019003, + "tokens_seen": 1272971264 + }, + { + "epoch": 0.46, + "learning_rate": 0.000275255391600454, + "loss": 3.2501, + "theoretical_loss": 3.567875379552525, + "tokens_seen": 1274019840 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4882226884365082, + "objective/train/docs_used": 720761, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1280031204223633, + "objective/train/original_loss": 3.1280031204223633, + "objective/train/theoretical_loss": 3.5677085034320273, + "objective/train/tokens_used": 1295135200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24101290106773376, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500473976135254, + "objective/train/weighted_lm_loss": 3.284193992614746, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9808136820793152, + "theoretical_loss": 3.5677085034320273, + "tokens_seen": 1274675200 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027506621263715476, + "loss": 3.275, + "theoretical_loss": 3.567608430464604, + "tokens_seen": 1275068416 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027487703367385546, + "loss": 3.2107, + "theoretical_loss": 3.567341762227932, + "tokens_seen": 1276116992 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002746878547105562, + "loss": 3.2512, + "theoretical_loss": 3.567075374316623, + "tokens_seen": 1277165568 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4728912115097046, + "objective/train/docs_used": 722731, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8755643367767334, + "objective/train/original_loss": 2.8755640983581543, + "objective/train/theoretical_loss": 3.566875767031105, + "objective/train/tokens_used": 1298412000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23524631559848785, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484849214553833, + "objective/train/weighted_lm_loss": 3.0170345306396484, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9677127599716187, + "theoretical_loss": 3.566875767031105, + "tokens_seen": 1277952000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002744986757472569, + "loss": 3.2255, + "theoretical_loss": 3.5668092662062048, + "tokens_seen": 1278214144 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002743094967839576, + "loss": 3.2771, + "theoretical_loss": 3.566543437373617, + "tokens_seen": 1279262720 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027412031782065834, + "loss": 3.2012, + "theoretical_loss": 3.5662778872972036, + "tokens_seen": 1280311296 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4915541112422943, + "objective/train/docs_used": 724789, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.916830062866211, + "objective/train/original_loss": 2.916830062866211, + "objective/train/theoretical_loss": 3.5660457592384924, + "objective/train/tokens_used": 1301688800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24279290437698364, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503896474838257, + "objective/train/weighted_lm_loss": 3.063872814178467, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9648842215538025, + "theoretical_loss": 3.5660457592384924, + "tokens_seen": 1281228800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027393113885735903, + "loss": 3.2504, + "theoretical_loss": 3.56601261545671, + "tokens_seen": 1281359872 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027374195989405983, + "loss": 3.1818, + "theoretical_loss": 3.565747621333277, + "tokens_seen": 1282408448 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027355278093076053, + "loss": 3.1748, + "theoretical_loss": 3.565482904409436, + "tokens_seen": 1283457024 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.49075672030448914, + "objective/train/docs_used": 727075, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.278653860092163, + "objective/train/original_loss": 3.278654098510742, + "objective/train/theoretical_loss": 3.5652184641691047, + "objective/train/tokens_used": 1304965600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24346491694450378, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503132343292236, + "objective/train/weighted_lm_loss": 3.4435787200927734, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9591943025588989, + "theoretical_loss": 3.5652184641691047, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002733636019674612, + "loss": 3.2211, + "theoretical_loss": 3.5652184641691047, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027317442300416197, + "loss": 3.2112, + "theoretical_loss": 3.5649543000975825, + "tokens_seen": 1285554176 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027298524404086266, + "loss": 3.2327, + "theoretical_loss": 3.564690411681543, + "tokens_seen": 1286602752 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027279606507756336, + "loss": 3.1368, + "theoretical_loss": 3.564426798409034, + "tokens_seen": 1287651328 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.49102783203125, + "objective/train/docs_used": 728554, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.136857032775879, + "objective/train/original_loss": 3.136857032775879, + "objective/train/theoretical_loss": 3.5643938660705556, + "objective/train/tokens_used": 1308242400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2425263375043869, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503356456756592, + "objective/train/weighted_lm_loss": 3.294790029525757, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9526902437210083, + "theoretical_loss": 3.5643938660705556, + "tokens_seen": 1287782400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002726068861142641, + "loss": 3.2618, + "theoretical_loss": 3.5641634597694685, + "tokens_seen": 1288699904 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002724177071509648, + "loss": 3.2774, + "theoretical_loss": 3.5639003952536212, + "tokens_seen": 1289748480 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027222852818766554, + "loss": 3.2149, + "theoretical_loss": 3.563637604353625, + "tokens_seen": 1290797056 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4924575090408325, + "objective/train/docs_used": 730680, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1504783630371094, + "objective/train/original_loss": 3.1504788398742676, + "objective/train/theoretical_loss": 3.5635719493217155, + "objective/train/tokens_used": 1311519200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2439342588186264, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504858493804932, + "objective/train/weighted_lm_loss": 3.309021234512329, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9914089441299438, + "theoretical_loss": 3.5635719493217155, + "tokens_seen": 1291059200 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027203934922436624, + "loss": 3.272, + "theoretical_loss": 3.563375086562964, + "tokens_seen": 1291845632 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027185017026106693, + "loss": 3.3122, + "theoretical_loss": 3.563112841376472, + "tokens_seen": 1292894208 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002716609912977677, + "loss": 3.254, + "theoretical_loss": 3.562850868290324, + "tokens_seen": 1293942784 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4821608066558838, + "objective/train/docs_used": 732486, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.109811305999756, + "objective/train/original_loss": 3.109811305999756, + "objective/train/theoretical_loss": 3.5627526984312885, + "objective/train/tokens_used": 1314796000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2412458062171936, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049442172050476, + "objective/train/weighted_lm_loss": 3.2632744312286377, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9536179304122925, + "theoretical_loss": 3.5627526984312885, + "tokens_seen": 1294336000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002714718123344684, + "loss": 3.2944, + "theoretical_loss": 3.5625891668020353, + "tokens_seen": 1294991360 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002712826333711692, + "loss": 3.2955, + "theoretical_loss": 3.5623277364104537, + "tokens_seen": 1296039936 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027109345440786987, + "loss": 3.2216, + "theoretical_loss": 3.562066576615756, + "tokens_seen": 1297088512 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4861340820789337, + "objective/train/docs_used": 734540, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.098299741744995, + "objective/train/original_loss": 3.098299503326416, + "objective/train/theoretical_loss": 3.5619360980364068, + "objective/train/tokens_used": 1318072800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23957262933254242, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498310327529907, + "objective/train/weighted_lm_loss": 3.252875328063965, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9518746733665466, + "theoretical_loss": 3.5619360980364068, + "tokens_seen": 1297612800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027090427544457056, + "loss": 3.1921, + "theoretical_loss": 3.5618056869194454, + "tokens_seen": 1298137088 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002707150964812713, + "loss": 3.2447, + "theoretical_loss": 3.561545066824343, + "tokens_seen": 1299185664 + }, + { + "epoch": 0.46, + "learning_rate": 0.000270525917517972, + "loss": 3.2583, + "theoretical_loss": 3.561284715834587, + "tokens_seen": 1300234240 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.4824850559234619, + "objective/train/docs_used": 736491, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3983359336853027, + "objective/train/original_loss": 3.3983354568481445, + "objective/train/theoretical_loss": 3.5611221329012466, + "objective/train/tokens_used": 1321349600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24269729852676392, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049481987953186, + "objective/train/weighted_lm_loss": 3.5662224292755127, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9514743089675903, + "theoretical_loss": 3.5611221329012466, + "tokens_seen": 1300889600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00027033673855467275, + "loss": 3.2885, + "theoretical_loss": 3.5610246334556255, + "tokens_seen": 1301282816 + }, + { + "epoch": 0.47, + "learning_rate": 0.00027014755959137344, + "loss": 3.2482, + "theoretical_loss": 3.5607648191942145, + "tokens_seen": 1302331392 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026995838062807414, + "loss": 3.2405, + "theoretical_loss": 3.56050527255841, + "tokens_seen": 1303379968 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.49545571208000183, + "objective/train/docs_used": 738318, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4414329528808594, + "objective/train/original_loss": 3.4414329528808594, + "objective/train/theoretical_loss": 3.5603107879156584, + "objective/train/tokens_used": 1324626400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2457858920097351, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050795078277588, + "objective/train/weighted_lm_loss": 3.6163370609283447, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 1.01546049118042, + "theoretical_loss": 3.5603107879156584, + "tokens_seen": 1304166400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002697692016647749, + "loss": 3.3299, + "theoretical_loss": 3.560245993057567, + "tokens_seen": 1304428544 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002695800227014756, + "loss": 3.2857, + "theoretical_loss": 3.5599869802023325, + "tokens_seen": 1305477120 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026939084373817627, + "loss": 3.2556, + "theoretical_loss": 3.5597282335046425, + "tokens_seen": 1306525696 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.4869755208492279, + "objective/train/docs_used": 740377, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0287930965423584, + "objective/train/original_loss": 3.0287928581237793, + "objective/train/theoretical_loss": 3.5595020480938198, + "objective/train/tokens_used": 1327903200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24304500222206116, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049932837486267, + "objective/train/weighted_lm_loss": 3.180659294128418, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.952156662940979, + "theoretical_loss": 3.5595020480938198, + "tokens_seen": 1307443200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002692016647748771, + "loss": 3.2115, + "theoretical_loss": 3.5594697524777175, + "tokens_seen": 1307574272 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026901248581157777, + "loss": 3.2641, + "theoretical_loss": 3.559211536636057, + "tokens_seen": 1308622848 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002688233068482785, + "loss": 3.206, + "theoretical_loss": 3.5589535854954364, + "tokens_seen": 1309671424 + }, + { + "debugging/Self-BLEU-5": 0.49020908264157476, + "debugging/distinct-1-grams": 0.768901113497886, + "debugging/distinct-2-grams": 0.9428782333551957, + "debugging/entropy-1-grams": 6.085999550681761, + "debugging/entropy-2-grams": 7.0033060167714964, + "debugging/length": 490.2352941176471, + "debugging/num_segments": 17, + "debugging/raw_token_scores_avg": 0.02056093141436577, + "debugging/raw_token_scores_std": 0.10981010645627975, + "epoch": 0.47, + "objective/train/advantage_avg": 0.47942253947257996, + "objective/train/docs_used": 741674, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0563838481903076, + "objective/train/original_loss": 3.0563840866088867, + "objective/train/theoretical_loss": 3.5586958985729016, + "objective/train/tokens_used": 1331180000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24191518127918243, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049171805381775, + "objective/train/weighted_lm_loss": 3.2049503326416016, + "objective/train/weights_max": 1.0512198209762573, + "objective/train/weights_min": 0.9514583349227905, + "theoretical_loss": 3.5586958985729016, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002686341278849792, + "loss": 3.2782, + "theoretical_loss": 3.5586958985729016, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002684449489216799, + "loss": 3.2573, + "theoretical_loss": 3.558438475386766, + "tokens_seen": 1311768576 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026825576995838065, + "loss": 3.2587, + "theoretical_loss": 3.5581813154566038, + "tokens_seen": 1312817152 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026806659099508134, + "loss": 3.2928, + "theoretical_loss": 3.5579244183032483, + "tokens_seen": 1313865728 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.4831710159778595, + "objective/train/docs_used": 743814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.882990837097168, + "objective/train/original_loss": 2.882990837097168, + "objective/train/theoretical_loss": 3.5578923246117578, + "objective/train/tokens_used": 1334456800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23744919896125793, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049523949623108, + "objective/train/weighted_lm_loss": 3.0261266231536865, + "objective/train/weights_max": 1.0512158870697021, + "objective/train/weights_min": 0.9561281800270081, + "theoretical_loss": 3.5578923246117578, + "tokens_seen": 1313996800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002678774120317821, + "loss": 3.1971, + "theoretical_loss": 3.557667783448787, + "tokens_seen": 1314914304 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002676882330684828, + "loss": 3.2104, + "theoretical_loss": 3.5574114104165546, + "tokens_seen": 1315962880 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002674990541051835, + "loss": 3.2483, + "theoretical_loss": 3.557155298731134, + "tokens_seen": 1317011456 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.49083414673805237, + "objective/train/docs_used": 745506, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4596221446990967, + "objective/train/original_loss": 3.459622383117676, + "objective/train/theoretical_loss": 3.5570913115896228, + "objective/train/tokens_used": 1337733600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24352119863033295, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050321340560913, + "objective/train/weighted_lm_loss": 3.6329755783081055, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9624701142311096, + "theoretical_loss": 3.5570913115896228, + "tokens_seen": 1317273600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002673098751418842, + "loss": 3.2454, + "theoretical_loss": 3.5568994479183456, + "tokens_seen": 1318060032 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002671206961785849, + "loss": 3.2333, + "theoretical_loss": 3.55664385750525, + "tokens_seen": 1319108608 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026693151721528567, + "loss": 3.3081, + "theoretical_loss": 3.556388527020138, + "tokens_seen": 1320157184 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.4799773693084717, + "objective/train/docs_used": 747613, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.5142557621002197, + "objective/train/original_loss": 3.514256000518799, + "objective/train/theoretical_loss": 3.5562928450048386, + "objective/train/tokens_used": 1341010400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2389097660779953, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049212098121643, + "objective/train/weighted_lm_loss": 3.686645030975342, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9529973268508911, + "theoretical_loss": 3.5562928450048386, + "tokens_seen": 1320550400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002667423382519864, + "loss": 3.2451, + "theoretical_loss": 3.556133455992528, + "tokens_seen": 1321205760 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002665531592886871, + "loss": 3.2432, + "theoretical_loss": 3.5558786439531653, + "tokens_seen": 1322254336 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026636398032538785, + "loss": 3.2586, + "theoretical_loss": 3.555624090434014, + "tokens_seen": 1323302912 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.4838820695877075, + "objective/train/docs_used": 749700, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.848118305206299, + "objective/train/original_loss": 2.848118305206299, + "objective/train/theoretical_loss": 3.555496910473588, + "objective/train/tokens_used": 1344287200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2406865805387497, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496114492416382, + "objective/train/weighted_lm_loss": 2.9888694286346436, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9514018893241882, + "theoretical_loss": 3.555496910473588, + "tokens_seen": 1323827200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026617480136208855, + "loss": 3.2677, + "theoretical_loss": 3.555369794968252, + "tokens_seen": 1324351488 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026598562239878924, + "loss": 3.2091, + "theoretical_loss": 3.555115757090271, + "tokens_seen": 1325400064 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026579644343549, + "loss": 3.2848, + "theoretical_loss": 3.554861976335671, + "tokens_seen": 1326448640 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.45839741826057434, + "objective/train/docs_used": 752163, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.01212739944458, + "objective/train/original_loss": 3.01212739944458, + "objective/train/theoretical_loss": 3.5547034937286472, + "objective/train/tokens_used": 1347564000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2407991886138916, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047062635421753, + "objective/train/weighted_lm_loss": 3.158634901046753, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.952105700969696, + "theoretical_loss": 3.5547034937286472, + "tokens_seen": 1327104000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002656072644721907, + "loss": 3.1785, + "theoretical_loss": 3.5546084522412533, + "tokens_seen": 1327497216 + }, + { + "epoch": 0.47, + "learning_rate": 0.00026541808550889143, + "loss": 3.3024, + "theoretical_loss": 3.5543551843450203, + "tokens_seen": 1328545792 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002652289065455921, + "loss": 3.1727, + "theoretical_loss": 3.5541021721861696, + "tokens_seen": 1329594368 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.4735065698623657, + "objective/train/docs_used": 753856, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.942960739135742, + "objective/train/original_loss": 2.942960739135742, + "objective/train/theoretical_loss": 3.5539125806181584, + "objective/train/tokens_used": 1350840800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23893173038959503, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485646724700928, + "objective/train/weighted_lm_loss": 3.0864858627319336, + "objective/train/weights_max": 1.0512158870697021, + "objective/train/weights_min": 0.9517002701759338, + "theoretical_loss": 3.5539125806181584, + "tokens_seen": 1330380800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002650397275822928, + "loss": 3.2689, + "theoretical_loss": 3.5538494153050895, + "tokens_seen": 1330642944 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026485054861899356, + "loss": 3.3248, + "theoretical_loss": 3.5535969132433554, + "tokens_seen": 1331691520 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002646613696556943, + "loss": 3.2776, + "theoretical_loss": 3.5533446655437277, + "tokens_seen": 1332740096 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.47561439871788025, + "objective/train/docs_used": 755628, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0288760662078857, + "objective/train/original_loss": 3.028876304626465, + "objective/train/theoretical_loss": 3.5531241571044148, + "objective/train/tokens_used": 1354117600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23352853953838348, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487481355667114, + "objective/train/weighted_lm_loss": 3.1767513751983643, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9534009695053101, + "theoretical_loss": 3.5531241571044148, + "tokens_seen": 1333657600 + }, + { + "epoch": 0.48, + "learning_rate": 0.000264472190692395, + "loss": 3.2214, + "theoretical_loss": 3.5530926717501448, + "tokens_seen": 1333788672 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026428301172909575, + "loss": 3.2087, + "theoretical_loss": 3.5528409314077205, + "tokens_seen": 1334837248 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026409383276579645, + "loss": 3.2751, + "theoretical_loss": 3.5525894440627415, + "tokens_seen": 1335885824 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.48273754119873047, + "objective/train/docs_used": 757781, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.197702407836914, + "objective/train/original_loss": 3.197701930999756, + "objective/train/theoretical_loss": 3.5523382092626603, + "objective/train/tokens_used": 1357394400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23674197494983673, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494771003723145, + "objective/train/weighted_lm_loss": 3.355456590652466, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9578342437744141, + "theoretical_loss": 3.5523382092626603, + "tokens_seen": 1336934400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002639046538024972, + "loss": 3.2785, + "theoretical_loss": 3.5523382092626603, + "tokens_seen": 1336934400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002637154748391979, + "loss": 3.3046, + "theoretical_loss": 3.552087226556094, + "tokens_seen": 1337982976 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002635262958758986, + "loss": 3.2553, + "theoretical_loss": 3.5518364954928185, + "tokens_seen": 1339031552 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026333711691259933, + "loss": 3.2336, + "theoretical_loss": 3.551586015623767, + "tokens_seen": 1340080128 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.49417945742607117, + "objective/train/docs_used": 759683, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.986185312271118, + "objective/train/original_loss": 2.9861855506896973, + "objective/train/theoretical_loss": 3.5515547232799087, + "objective/train/tokens_used": 1360671200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2461775243282318, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506694316864014, + "objective/train/weighted_lm_loss": 3.137205123901367, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9528197050094604, + "theoretical_loss": 3.5515547232799087, + "tokens_seen": 1340211200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002631479379493, + "loss": 3.2575, + "theoretical_loss": 3.5513357865010233, + "tokens_seen": 1341128704 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026295875898600077, + "loss": 3.1575, + "theoretical_loss": 3.5510858076778202, + "tokens_seen": 1342177280 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026276958002270146, + "loss": 3.285, + "theoretical_loss": 3.5508360787085342, + "tokens_seen": 1343225856 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.4842744767665863, + "objective/train/docs_used": 761574, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7354183197021484, + "objective/train/original_loss": 2.7354183197021484, + "objective/train/theoretical_loss": 3.550773685453774, + "objective/train/tokens_used": 1363948000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2384546399116516, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496395826339722, + "objective/train/weighted_lm_loss": 2.870645046234131, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9521103501319885, + "theoretical_loss": 3.550773685453774, + "tokens_seen": 1343488000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026258040105940216, + "loss": 3.1794, + "theoretical_loss": 3.5505865991486827, + "tokens_seen": 1344274432 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026239122209610296, + "loss": 3.1656, + "theoretical_loss": 3.5503373685549184, + "tokens_seen": 1345323008 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026220204313280365, + "loss": 3.2142, + "theoretical_loss": 3.5500883864850294, + "tokens_seen": 1346371584 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.4764693081378937, + "objective/train/docs_used": 763625, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9616000652313232, + "objective/train/original_loss": 2.9616000652313232, + "objective/train/theoretical_loss": 3.5499950821913204, + "objective/train/tokens_used": 1367224800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23259218037128448, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488290786743164, + "objective/train/weighted_lm_loss": 3.1081156730651855, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9584375023841858, + "theoretical_loss": 3.5499950821913204, + "tokens_seen": 1346764800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002620128641695044, + "loss": 3.3291, + "theoretical_loss": 3.5498396524979308, + "tokens_seen": 1347420160 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002618236852062051, + "loss": 3.237, + "theoretical_loss": 3.5495911661536637, + "tokens_seen": 1348468736 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002616345062429058, + "loss": 3.293, + "theoretical_loss": 3.5493429270133907, + "tokens_seen": 1349517312 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.48155462741851807, + "objective/train/docs_used": 764744, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.73677396774292, + "objective/train/original_loss": 2.736774206161499, + "objective/train/theoretical_loss": 3.549218900007921, + "objective/train/tokens_used": 1370501600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.235448956489563, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493521690368652, + "objective/train/weighted_lm_loss": 2.8729958534240723, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.953024685382843, + "theoretical_loss": 3.549218900007921, + "tokens_seen": 1350041600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026144532727960653, + "loss": 3.2426, + "theoretical_loss": 3.549094934639392, + "tokens_seen": 1350565888 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026125614831630723, + "loss": 3.1944, + "theoretical_loss": 3.5488471885950625, + "tokens_seen": 1351614464 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002610669693530079, + "loss": 3.2782, + "theoretical_loss": 3.5485996884449076, + "tokens_seen": 1352663040 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.4894540011882782, + "objective/train/docs_used": 766515, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1192467212677, + "objective/train/original_loss": 3.1192469596862793, + "objective/train/theoretical_loss": 3.5484451255261353, + "objective/train/tokens_used": 1373778400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24316319823265076, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501813888549805, + "objective/train/weighted_lm_loss": 3.2756810188293457, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9519091248512268, + "theoretical_loss": 3.5484451255261353, + "tokens_seen": 1353318400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026087779038970867, + "loss": 3.2537, + "theoretical_loss": 3.5483524337545385, + "tokens_seen": 1353711616 + }, + { + "epoch": 0.48, + "learning_rate": 0.00026068861142640936, + "loss": 3.1763, + "theoretical_loss": 3.5481054240906698, + "tokens_seen": 1354760192 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002604994324631101, + "loss": 3.2387, + "theoretical_loss": 3.547858659021117, + "tokens_seen": 1355808768 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.4911956787109375, + "objective/train/docs_used": 768508, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8986051082611084, + "objective/train/original_loss": 2.8986048698425293, + "objective/train/theoretical_loss": 3.5476737454746035, + "objective/train/tokens_used": 1377055200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24308650195598602, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050355315208435, + "objective/train/weighted_lm_loss": 3.0448904037475586, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9534386992454529, + "theoretical_loss": 3.5476737454746035, + "tokens_seen": 1356595200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002603102534998108, + "loss": 3.2887, + "theoretical_loss": 3.5476121381147894, + "tokens_seen": 1356857344 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026012107453651155, + "loss": 3.2406, + "theoretical_loss": 3.5473658609416896, + "tokens_seen": 1357905920 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002599318955732123, + "loss": 3.2552, + "theoretical_loss": 3.5471198270729083, + "tokens_seen": 1358954496 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.4947124123573303, + "objective/train/docs_used": 770636, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0518202781677246, + "objective/train/original_loss": 3.0518202781677246, + "objective/train/theoretical_loss": 3.5469047466869448, + "objective/train/tokens_used": 1380332000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24551640450954437, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0507193803787231, + "objective/train/weighted_lm_loss": 3.206749200820923, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9574251174926758, + "theoretical_loss": 3.5469047466869448, + "tokens_seen": 1359872000 + }, + { + "epoch": 0.49, + "learning_rate": 0.000259742716609913, + "loss": 3.287, + "theoretical_loss": 3.546874036080621, + "tokens_seen": 1360003072 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025955353764661374, + "loss": 3.2775, + "theoretical_loss": 3.5466284875380856, + "tokens_seen": 1361051648 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025936435868331443, + "loss": 3.337, + "theoretical_loss": 3.546383181019637, + "tokens_seen": 1362100224 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.47492796182632446, + "objective/train/docs_used": 772517, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1139206886291504, + "objective/train/original_loss": 3.1139206886291504, + "objective/train/theoretical_loss": 3.5461381161006846, + "objective/train/tokens_used": 1383608800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23219169676303864, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486730337142944, + "objective/train/weighted_lm_loss": 3.2662875652313232, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9535848498344421, + "theoretical_loss": 3.5461381161006846, + "tokens_seen": 1363148800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002591751797200151, + "loss": 3.2782, + "theoretical_loss": 3.5461381161006846, + "tokens_seen": 1363148800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002589860007567159, + "loss": 3.2394, + "theoretical_loss": 3.5458932923577082, + "tokens_seen": 1364197376 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025879682179341657, + "loss": 3.2557, + "theoretical_loss": 3.5456487093682547, + "tokens_seen": 1365245952 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025860764283011726, + "loss": 3.2417, + "theoretical_loss": 3.5454043667109367, + "tokens_seen": 1366294528 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.4928061366081238, + "objective/train/docs_used": 774661, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.42317533493042, + "objective/train/original_loss": 3.423175096511841, + "objective/train/theoretical_loss": 3.545373840756179, + "objective/train/tokens_used": 1386885600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24523727595806122, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505273342132568, + "objective/train/weighted_lm_loss": 3.59568452835083, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.968864381313324, + "theoretical_loss": 3.545373840756179, + "tokens_seen": 1366425600 + }, + { + "epoch": 0.49, + "learning_rate": 0.000258418463866818, + "loss": 3.3035, + "theoretical_loss": 3.545160263965424, + "tokens_seen": 1367343104 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002582292849035187, + "loss": 3.27, + "theoretical_loss": 3.544916400712445, + "tokens_seen": 1368391680 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025804010594021945, + "loss": 3.2883, + "theoretical_loss": 3.5446727765337815, + "tokens_seen": 1369440256 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.48920339345932007, + "objective/train/docs_used": 776810, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.031377077102661, + "objective/train/original_loss": 3.0313773155212402, + "objective/train/theoretical_loss": 3.5446119077955673, + "objective/train/tokens_used": 1390162400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24134649336338043, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050147294998169, + "objective/train/weighted_lm_loss": 3.183500051498413, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9817801117897034, + "theoretical_loss": 3.5446119077955673, + "tokens_seen": 1369702400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002578509269769202, + "loss": 3.2427, + "theoretical_loss": 3.5444293910122644, + "tokens_seen": 1370488832 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002576617480136209, + "loss": 3.2541, + "theoretical_loss": 3.544186243731771, + "tokens_seen": 1371537408 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025747256905032164, + "loss": 3.2502, + "theoretical_loss": 3.5439433342772224, + "tokens_seen": 1372585984 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.47569069266319275, + "objective/train/docs_used": 778576, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1365725994110107, + "objective/train/original_loss": 3.1365721225738525, + "objective/train/theoretical_loss": 3.543852304461728, + "objective/train/tokens_used": 1393439200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.233524888753891, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487558841705322, + "objective/train/weighted_lm_loss": 3.2887704372406006, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9515125155448914, + "theoretical_loss": 3.543852304461728, + "tokens_seen": 1372979200 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025728339008702233, + "loss": 3.2909, + "theoretical_loss": 3.5437006622345777, + "tokens_seen": 1373634560 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002570942111237231, + "loss": 3.3048, + "theoretical_loss": 3.5434582271908344, + "tokens_seen": 1374683136 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002569050321604238, + "loss": 3.2858, + "theoretical_loss": 3.5432160287340206, + "tokens_seen": 1375731712 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.47667446732521057, + "objective/train/docs_used": 780651, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3615801334381104, + "objective/train/original_loss": 3.3615803718566895, + "objective/train/theoretical_loss": 3.5430950180972527, + "objective/train/tokens_used": 1396716000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2355356216430664, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488646030426025, + "objective/train/weighted_lm_loss": 3.5261738300323486, + "objective/train/weights_max": 1.0512200593948364, + "objective/train/weights_min": 0.9540208578109741, + "theoretical_loss": 3.5430950180972527, + "tokens_seen": 1376256000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025671585319712447, + "loss": 3.2503, + "theoretical_loss": 3.542974066453195, + "tokens_seen": 1376780288 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002565266742338252, + "loss": 3.2741, + "theoretical_loss": 3.5427323399384427, + "tokens_seen": 1377828864 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002563374952705259, + "loss": 3.3355, + "theoretical_loss": 3.5424908487808704, + "tokens_seen": 1378877440 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.490723192691803, + "objective/train/docs_used": 781840, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0954983234405518, + "objective/train/original_loss": 3.0954983234405518, + "objective/train/theoretical_loss": 3.5423400361434307, + "objective/train/tokens_used": 1399992800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24198147654533386, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503023862838745, + "objective/train/weighted_lm_loss": 3.2512621879577637, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9551610946655273, + "theoretical_loss": 3.5423400361434307, + "tokens_seen": 1379532800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002561483163072266, + "loss": 3.2322, + "theoretical_loss": 3.542249592572605, + "tokens_seen": 1379926016 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025595913734392735, + "loss": 3.2141, + "theoretical_loss": 3.5420085709067894, + "tokens_seen": 1380974592 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025576995838062804, + "loss": 3.21, + "theoretical_loss": 3.541767783377579, + "tokens_seen": 1382023168 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.46955326199531555, + "objective/train/docs_used": 784187, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0273990631103516, + "objective/train/original_loss": 3.0273988246917725, + "objective/train/theoretical_loss": 3.541587346139247, + "objective/train/tokens_used": 1403269600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23108002543449402, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481295585632324, + "objective/train/weighted_lm_loss": 3.174248218536377, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9517195820808411, + "theoretical_loss": 3.541587346139247, + "tokens_seen": 1382809600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025558077941732884, + "loss": 3.1799, + "theoretical_loss": 3.5415272295801388, + "tokens_seen": 1383071744 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025539160045402954, + "loss": 3.2792, + "theoretical_loss": 3.5412869091106405, + "tokens_seen": 1384120320 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025520242149073023, + "loss": 3.2038, + "theoretical_loss": 3.541046821566258, + "tokens_seen": 1385168896 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.47911426424980164, + "objective/train/docs_used": 786072, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8968513011932373, + "objective/train/original_loss": 2.8968517780303955, + "objective/train/theoretical_loss": 3.540836935720394, + "objective/train/tokens_used": 1406546400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23572920262813568, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049109697341919, + "objective/train/weighted_lm_loss": 3.039515972137451, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9524866938591003, + "theoretical_loss": 3.540836935720394, + "tokens_seen": 1386086400 + }, + { + "epoch": 0.5, + "learning_rate": 0.000255013242527431, + "loss": 3.2616, + "theoretical_loss": 3.5408069665451656, + "tokens_seen": 1386217472 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025482406356413167, + "loss": 3.1624, + "theoretical_loss": 3.5405673436465332, + "tokens_seen": 1387266048 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002546348846008324, + "loss": 3.2074, + "theoretical_loss": 3.540327952470525, + "tokens_seen": 1388314624 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.48018690943717957, + "objective/train/docs_used": 787611, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.299957036972046, + "objective/train/original_loss": 3.299956798553467, + "objective/train/theoretical_loss": 3.5400887926182953, + "objective/train/tokens_used": 1409823200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2368466556072235, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049222469329834, + "objective/train/weighted_lm_loss": 3.462350845336914, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9544512629508972, + "theoretical_loss": 3.5400887926182953, + "tokens_seen": 1389363200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002544457056375331, + "loss": 3.2096, + "theoretical_loss": 3.5400887926182953, + "tokens_seen": 1389363200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002542565266742338, + "loss": 3.2465, + "theoretical_loss": 3.5398498636919835, + "tokens_seen": 1390411776 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025406734771093455, + "loss": 3.2359, + "theoretical_loss": 3.5396111652947146, + "tokens_seen": 1391460352 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025387816874763525, + "loss": 3.1666, + "theoretical_loss": 3.539372697030594, + "tokens_seen": 1392508928 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.4910789430141449, + "objective/train/docs_used": 789336, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.141433000564575, + "objective/train/original_loss": 3.1414332389831543, + "objective/train/theoretical_loss": 3.5393429046591365, + "objective/train/tokens_used": 1413100000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2442692369222641, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503495931625366, + "objective/train/weighted_lm_loss": 3.2997946739196777, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9526776075363159, + "theoretical_loss": 3.5393429046591365, + "tokens_seen": 1392640000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025368898978433594, + "loss": 3.3043, + "theoretical_loss": 3.5391344585047024, + "tokens_seen": 1393557504 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002534998108210367, + "loss": 3.2916, + "theoretical_loss": 3.538896449323098, + "tokens_seen": 1394606080 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025331063185773744, + "loss": 3.2459, + "theoretical_loss": 3.538658669092807, + "tokens_seen": 1395654656 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.46647822856903076, + "objective/train/docs_used": 791369, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.493539810180664, + "objective/train/original_loss": 3.493539810180664, + "objective/train/theoretical_loss": 3.5385992597629174, + "objective/train/tokens_used": 1416376800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2277674525976181, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478054285049438, + "objective/train/weighted_lm_loss": 3.659994602203369, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9575470685958862, + "theoretical_loss": 3.5385992597629174, + "tokens_seen": 1395916800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002531214528944382, + "loss": 3.2459, + "theoretical_loss": 3.5384211174218247, + "tokens_seen": 1396703232 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002529322739311389, + "loss": 3.2727, + "theoretical_loss": 3.538183793919112, + "tokens_seen": 1397751808 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025274309496783957, + "loss": 3.2219, + "theoretical_loss": 3.53794669819459, + "tokens_seen": 1398800384 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.4890250563621521, + "objective/train/docs_used": 793176, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.05161190032959, + "objective/train/original_loss": 3.05161190032959, + "objective/train/theoretical_loss": 3.5378578459425087, + "objective/train/tokens_used": 1419653600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24090653657913208, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501271486282349, + "objective/train/weighted_lm_loss": 3.204907178878784, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9735680818557739, + "theoretical_loss": 3.5378578459425087, + "tokens_seen": 1399193600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002525539160045403, + "loss": 3.2726, + "theoretical_loss": 3.53770982985914, + "tokens_seen": 1399848960 + }, + { + "epoch": 0.5, + "learning_rate": 0.000252364737041241, + "loss": 3.2014, + "theoretical_loss": 3.537473188524598, + "tokens_seen": 1400897536 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025217555807794176, + "loss": 3.2634, + "theoretical_loss": 3.5372367738037527, + "tokens_seen": 1401946112 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.4814291298389435, + "objective/train/docs_used": 795165, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7210984230041504, + "objective/train/original_loss": 2.7210984230041504, + "objective/train/theoretical_loss": 3.537118651302722, + "objective/train/tokens_used": 1422930400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2377876341342926, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493515729904175, + "objective/train/weighted_lm_loss": 2.8548381328582764, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9514068365097046, + "theoretical_loss": 3.537118651302722, + "tokens_seen": 1402470400 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025198637911464245, + "loss": 3.2325, + "theoretical_loss": 3.5370005853103414, + "tokens_seen": 1402994688 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025179720015134315, + "loss": 3.2092, + "theoretical_loss": 3.5367646226590503, + "tokens_seen": 1404043264 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002516080211880439, + "loss": 3.1997, + "theoretical_loss": 3.536528885465506, + "tokens_seen": 1405091840 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.48178187012672424, + "objective/train/docs_used": 797297, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.338026285171509, + "objective/train/original_loss": 3.3380260467529297, + "objective/train/theoretical_loss": 3.536381664039392, + "objective/train/tokens_used": 1426207200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24058951437473297, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049401044845581, + "objective/train/weighted_lm_loss": 3.503382682800293, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9520537853240967, + "theoretical_loss": 3.536381664039392, + "tokens_seen": 1405747200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002514188422247446, + "loss": 3.2556, + "theoretical_loss": 3.536293373346278, + "tokens_seen": 1406140416 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002512296632614453, + "loss": 3.1468, + "theoretical_loss": 3.5360580859188713, + "tokens_seen": 1407188992 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002510404842981461, + "loss": 3.1976, + "theoretical_loss": 3.5358230228017264, + "tokens_seen": 1408237568 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.4824129343032837, + "objective/train/docs_used": 799421, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9747111797332764, + "objective/train/original_loss": 2.9747114181518555, + "objective/train/theoretical_loss": 3.5356468724384706, + "objective/train/tokens_used": 1429484000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23822368681430817, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494521856307983, + "objective/train/weighted_lm_loss": 3.1242523193359375, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9635123014450073, + "theoretical_loss": 3.5356468724384706, + "tokens_seen": 1409024000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002508513053348468, + "loss": 3.1687, + "theoretical_loss": 3.535588183614215, + "tokens_seen": 1409286144 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002506621263715475, + "loss": 3.1706, + "theoretical_loss": 3.535353567976637, + "tokens_seen": 1410334720 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002504729474082482, + "loss": 3.1224, + "theoretical_loss": 3.5351191755102187, + "tokens_seen": 1411383296 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.48010411858558655, + "objective/train/docs_used": 800610, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.90187406539917, + "objective/train/original_loss": 2.901874542236328, + "objective/train/theoretical_loss": 3.5349142648751304, + "objective/train/tokens_used": 1432760800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23449444770812988, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492023229599, + "objective/train/weighted_lm_loss": 3.0453240871429443, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9521341919898987, + "theoretical_loss": 3.5349142648751304, + "tokens_seen": 1412300800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002502837684449489, + "loss": 3.2347, + "theoretical_loss": 3.534885005837108, + "tokens_seen": 1412431872 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025009458948164966, + "loss": 3.1638, + "theoretical_loss": 3.5346510585803728, + "tokens_seen": 1413480448 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024990541051835035, + "loss": 3.1473, + "theoretical_loss": 3.534417333363997, + "tokens_seen": 1414529024 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.48848357796669006, + "objective/train/docs_used": 802603, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.191892623901367, + "objective/train/original_loss": 3.191892623901367, + "objective/train/theoretical_loss": 3.5341838298128803, + "objective/train/tokens_used": 1436037600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24162043631076813, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500764846801758, + "objective/train/weighted_lm_loss": 3.351858377456665, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9517599940299988, + "theoretical_loss": 3.5341838298128803, + "tokens_seen": 1415577600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002497162315550511, + "loss": 3.2598, + "theoretical_loss": 3.5341838298128803, + "tokens_seen": 1415577600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002495270525917518, + "loss": 3.2427, + "theoretical_loss": 3.5339505475528314, + "tokens_seen": 1416626176 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024933787362845254, + "loss": 3.1952, + "theoretical_loss": 3.533717486210567, + "tokens_seen": 1417674752 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024914869466515323, + "loss": 3.244, + "theoretical_loss": 3.5334846454137114, + "tokens_seen": 1418723328 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.4878292977809906, + "objective/train/docs_used": 804391, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.043952465057373, + "objective/train/original_loss": 3.043951988220215, + "objective/train/theoretical_loss": 3.533455555802692, + "objective/train/tokens_used": 1439314400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24175363779067993, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500118732452393, + "objective/train/weighted_lm_loss": 3.195713996887207, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9565330743789673, + "theoretical_loss": 3.533455555802692, + "tokens_seen": 1418854400 + }, + { + "epoch": 0.51, + "learning_rate": 0.000248959515701854, + "loss": 3.2038, + "theoretical_loss": 3.533252024790788, + "tokens_seen": 1419771904 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002487703367385547, + "loss": 3.2483, + "theoretical_loss": 3.5330196239712217, + "tokens_seen": 1420820480 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024858115777525537, + "loss": 3.1338, + "theoretical_loss": 3.532787442585333, + "tokens_seen": 1421869056 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.4747827351093292, + "objective/train/docs_used": 806541, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.976189136505127, + "objective/train/original_loss": 2.976189136505127, + "objective/train/theoretical_loss": 3.5327294314821365, + "objective/train/tokens_used": 1442591200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2333299070596695, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486640930175781, + "objective/train/weighted_lm_loss": 3.120753049850464, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9516189098358154, + "theoretical_loss": 3.5327294314821365, + "tokens_seen": 1422131200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002483919788119561, + "loss": 3.2056, + "theoretical_loss": 3.5325554802643375, + "tokens_seen": 1422917632 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024820279984865686, + "loss": 3.2624, + "theoretical_loss": 3.5323237366403397, + "tokens_seen": 1423966208 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024801362088535756, + "loss": 3.2228, + "theoretical_loss": 3.532092211346335, + "tokens_seen": 1425014784 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.491454541683197, + "objective/train/docs_used": 808636, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2102763652801514, + "objective/train/original_loss": 3.2102766036987305, + "objective/train/theoretical_loss": 3.5320054455745304, + "objective/train/tokens_used": 1445868000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24370373785495758, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503844022750854, + "objective/train/weighted_lm_loss": 3.3718581199645996, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9566341042518616, + "theoretical_loss": 3.5320054455745304, + "tokens_seen": 1425408000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024782444192205825, + "loss": 3.1844, + "theoretical_loss": 3.5318609040162015, + "tokens_seen": 1426063360 + }, + { + "epoch": 0.51, + "learning_rate": 0.000247635262958759, + "loss": 3.1546, + "theoretical_loss": 3.5316298142847016, + "tokens_seen": 1427111936 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002474460839954597, + "loss": 3.2048, + "theoretical_loss": 3.5313989417874763, + "tokens_seen": 1428160512 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.4886825978755951, + "objective/train/docs_used": 810397, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9854471683502197, + "objective/train/original_loss": 2.985447406768799, + "objective/train/theoretical_loss": 3.5312835868880947, + "objective/train/tokens_used": 1449144800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24093887209892273, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500929355621338, + "objective/train/weighted_lm_loss": 3.1349117755889893, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9605134129524231, + "theoretical_loss": 3.5312835868880947, + "tokens_seen": 1428684800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024725690503216044, + "loss": 3.2367, + "theoretical_loss": 3.5311682861610447, + "tokens_seen": 1429209088 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024706772606886113, + "loss": 3.2155, + "theoretical_loss": 3.5309378470427997, + "tokens_seen": 1430257664 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002468785471055619, + "loss": 3.1929, + "theoretical_loss": 3.530707624071006, + "tokens_seen": 1431306240 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.4831509590148926, + "objective/train/docs_used": 812373, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3620550632476807, + "objective/train/original_loss": 3.3620550632476807, + "objective/train/theoretical_loss": 3.530563844315122, + "objective/train/tokens_used": 1452421600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23847836256027222, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495271682739258, + "objective/train/weighted_lm_loss": 3.5282113552093506, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9517561793327332, + "theoretical_loss": 3.530563844315122, + "tokens_seen": 1431961600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002466893681422626, + "loss": 3.2089, + "theoretical_loss": 3.5304776168847964, + "tokens_seen": 1432354816 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002465001891789633, + "loss": 3.1689, + "theoretical_loss": 3.530247825124171, + "tokens_seen": 1433403392 + }, + { + "epoch": 0.51, + "learning_rate": 0.000246311010215664, + "loss": 3.1184, + "theoretical_loss": 3.530018248429992, + "tokens_seen": 1434451968 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.49197354912757874, + "objective/train/docs_used": 813976, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9489939212799072, + "objective/train/original_loss": 2.9489941596984863, + "objective/train/theoretical_loss": 3.5298462068311554, + "objective/train/tokens_used": 1455698400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24280238151550293, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504316091537476, + "objective/train/weighted_lm_loss": 3.0980122089385986, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9728739857673645, + "theoretical_loss": 3.5298462068311554, + "tokens_seen": 1435238400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002461218312523647, + "loss": 3.1976, + "theoretical_loss": 3.529788886443983, + "tokens_seen": 1435500544 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024593265228906546, + "loss": 3.0743, + "theoretical_loss": 3.529559738808726, + "tokens_seen": 1436549120 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002457434733257662, + "loss": 3.1564, + "theoretical_loss": 3.5293308051676573, + "tokens_seen": 1437597696 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.4857310950756073, + "objective/train/docs_used": 815908, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9717392921447754, + "objective/train/original_loss": 2.9717395305633545, + "objective/train/theoretical_loss": 3.5291306634941737, + "objective/train/tokens_used": 1458975200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24036578834056854, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497949123382568, + "objective/train/weighted_lm_loss": 3.1189889907836914, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9525620937347412, + "theoretical_loss": 3.5291306634941737, + "tokens_seen": 1438515200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002455542943624669, + "loss": 3.1752, + "theoretical_loss": 3.529102085165067, + "tokens_seen": 1438646272 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002453651153991676, + "loss": 3.1295, + "theoretical_loss": 3.5288735784460936, + "tokens_seen": 1439694848 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024517593643586834, + "loss": 3.1535, + "theoretical_loss": 3.5286452846567244, + "tokens_seen": 1440743424 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.4737866222858429, + "objective/train/docs_used": 817781, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.049960136413574, + "objective/train/original_loss": 3.049960136413574, + "objective/train/theoretical_loss": 3.528417203443791, + "objective/train/tokens_used": 1462252000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23372094333171844, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485665798187256, + "objective/train/weighted_lm_loss": 3.1971216201782227, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9524149298667908, + "theoretical_loss": 3.528417203443791, + "tokens_seen": 1441792000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024498675747256903, + "loss": 3.1712, + "theoretical_loss": 3.528417203443791, + "tokens_seen": 1441792000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002447975785092698, + "loss": 3.1211, + "theoretical_loss": 3.528189334454967, + "tokens_seen": 1442840576 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024460839954597053, + "loss": 3.1512, + "theoretical_loss": 3.527961677338765, + "tokens_seen": 1443889152 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002444192205826712, + "loss": 3.1175, + "theoretical_loss": 3.5277342317445353, + "tokens_seen": 1444937728 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.4741077423095703, + "objective/train/docs_used": 819121, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9588849544525146, + "objective/train/original_loss": 2.9588847160339355, + "objective/train/theoretical_loss": 3.5277058159004637, + "objective/train/tokens_used": 1465528800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24014198780059814, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048631191253662, + "objective/train/weighted_lm_loss": 3.10211443901062, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9528647065162659, + "theoretical_loss": 3.5277058159004637, + "tokens_seen": 1445068800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002442300416193719, + "loss": 3.1457, + "theoretical_loss": 3.5275069973224618, + "tokens_seen": 1445986304 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024404086265607263, + "loss": 3.1661, + "theoretical_loss": 3.527279973723561, + "tokens_seen": 1447034880 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024385168369277336, + "loss": 3.2027, + "theoretical_loss": 3.5270531605996767, + "tokens_seen": 1448083456 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.47648268938064575, + "objective/train/docs_used": 821111, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1611850261688232, + "objective/train/original_loss": 3.1611852645874023, + "objective/train/theoretical_loss": 3.5269964901647066, + "objective/train/tokens_used": 1468805600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23558388650417328, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048845648765564, + "objective/train/weighted_lm_loss": 3.3149309158325195, + "objective/train/weights_max": 1.0512150526046753, + "objective/train/weights_min": 0.9515277147293091, + "theoretical_loss": 3.5269964901647066, + "tokens_seen": 1448345600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002436625047294741, + "loss": 3.1585, + "theoretical_loss": 3.5268265576034805, + "tokens_seen": 1449132032 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024347332576617482, + "loss": 3.1574, + "theoretical_loss": 3.5266001643884684, + "tokens_seen": 1450180608 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024328414680287552, + "loss": 3.1439, + "theoretical_loss": 3.526373980608957, + "tokens_seen": 1451229184 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.4845767915248871, + "objective/train/docs_used": 823345, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.209475517272949, + "objective/train/original_loss": 3.209475517272949, + "objective/train/theoretical_loss": 3.526289215616317, + "objective/train/tokens_used": 1472082400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24037227034568787, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496795177459717, + "objective/train/weighted_lm_loss": 3.3680312633514404, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9529867768287659, + "theoretical_loss": 3.526289215616317, + "tokens_seen": 1451622400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024309496783957624, + "loss": 3.1728, + "theoretical_loss": 3.5261480059200814, + "tokens_seen": 1452277760 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024290578887627696, + "loss": 3.156, + "theoretical_loss": 3.5259222399777945, + "tokens_seen": 1453326336 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024271660991297768, + "loss": 3.1253, + "theoretical_loss": 3.525696682438861, + "tokens_seen": 1454374912 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.47921082377433777, + "objective/train/docs_used": 825270, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7915778160095215, + "objective/train/original_loss": 2.7915778160095215, + "objective/train/theoretical_loss": 3.525583981713613, + "objective/train/tokens_used": 1475359200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23409366607666016, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491108894348145, + "objective/train/weighted_lm_loss": 2.9294707775115967, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9529442191123962, + "theoretical_loss": 3.525583981713613, + "tokens_seen": 1454899200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002425274309496784, + "loss": 3.145, + "theoretical_loss": 3.5254713329608585, + "tokens_seen": 1455423488 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024233825198637912, + "loss": 3.1236, + "theoretical_loss": 3.5252461912021733, + "tokens_seen": 1456472064 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024214907302307984, + "loss": 3.12, + "theoretical_loss": 3.525021256821997, + "tokens_seen": 1457520640 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.4787052273750305, + "objective/train/docs_used": 827227, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9800362586975098, + "objective/train/original_loss": 2.9800362586975098, + "objective/train/theoretical_loss": 3.524880777992677, + "objective/train/tokens_used": 1478636000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23406291007995605, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490602254867554, + "objective/train/weighted_lm_loss": 3.127027750015259, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9527052044868469, + "theoretical_loss": 3.524880777992677, + "tokens_seen": 1458176000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024195989405978056, + "loss": 3.15, + "theoretical_loss": 3.5247965294803265, + "tokens_seen": 1458569216 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024177071509648128, + "loss": 3.1198, + "theoretical_loss": 3.524572008837958, + "tokens_seen": 1459617792 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024158153613318197, + "loss": 3.208, + "theoretical_loss": 3.5243476945564893, + "tokens_seen": 1460666368 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.4924372732639313, + "objective/train/docs_used": 829193, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1463100910186768, + "objective/train/original_loss": 3.1463098526000977, + "objective/train/theoretical_loss": 3.524179594066606, + "objective/train/tokens_used": 1481912800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24343392252922058, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504813194274902, + "objective/train/weighted_lm_loss": 3.304593086242676, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9679165482521057, + "theoretical_loss": 3.524179594066606, + "tokens_seen": 1461452800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024139235716988272, + "loss": 3.0859, + "theoretical_loss": 3.524123586298312, + "tokens_seen": 1461714944 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024120317820658344, + "loss": 3.1743, + "theoretical_loss": 3.5238996837266137, + "tokens_seen": 1462763520 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024101399924328416, + "loss": 3.1895, + "theoretical_loss": 3.5236759865053724, + "tokens_seen": 1463812096 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.45739272236824036, + "objective/train/docs_used": 831131, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.861849546432495, + "objective/train/original_loss": 2.861849784851074, + "objective/train/theoretical_loss": 3.5234804196247764, + "objective/train/tokens_used": 1485189600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23027275502681732, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.046909213066101, + "objective/train/weighted_lm_loss": 3.000051736831665, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9622508883476257, + "theoretical_loss": 3.5234804196247764, + "tokens_seen": 1464729600 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024082482027998486, + "loss": 3.1312, + "theoretical_loss": 3.523452494299356, + "tokens_seen": 1464860672 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024063564131668558, + "loss": 3.2132, + "theoretical_loss": 3.5232292067741176, + "tokens_seen": 1465909248 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002404464623533863, + "loss": 3.165, + "theoretical_loss": 3.523006123595997, + "tokens_seen": 1466957824 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.4719890356063843, + "objective/train/docs_used": 832511, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2662267684936523, + "objective/train/original_loss": 3.2662272453308105, + "objective/train/theoretical_loss": 3.522783244432115, + "objective/train/tokens_used": 1488466400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2407318353652954, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04842209815979, + "objective/train/weighted_lm_loss": 3.422483205795288, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9514791965484619, + "theoretical_loss": 3.522783244432115, + "tokens_seen": 1468006400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024025728339008705, + "loss": 3.1954, + "theoretical_loss": 3.522783244432115, + "tokens_seen": 1468006400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024006810442678777, + "loss": 3.2216, + "theoretical_loss": 3.52256056895037, + "tokens_seen": 1469054976 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023987892546348846, + "loss": 3.1763, + "theoretical_loss": 3.5223380968194404, + "tokens_seen": 1470103552 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023968974650018918, + "loss": 3.1598, + "theoretical_loss": 3.522115827708778, + "tokens_seen": 1471152128 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.492140531539917, + "objective/train/docs_used": 834109, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.772519588470459, + "objective/train/original_loss": 2.772519588470459, + "objective/train/theoretical_loss": 3.522088058328375, + "objective/train/tokens_used": 1491743200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2431326061487198, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050450086593628, + "objective/train/weighted_lm_loss": 2.913102626800537, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9724238514900208, + "theoretical_loss": 3.522088058328375, + "tokens_seen": 1471283200 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002395005675368899, + "loss": 3.1937, + "theoretical_loss": 3.5218937612886068, + "tokens_seen": 1472200704 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023931138857359062, + "loss": 3.1715, + "theoretical_loss": 3.521671897229922, + "tokens_seen": 1473249280 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023912220961029134, + "loss": 3.1673, + "theoretical_loss": 3.521450235204485, + "tokens_seen": 1474297856 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.4695150852203369, + "objective/train/docs_used": 835915, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.096074342727661, + "objective/train/original_loss": 3.096074104309082, + "objective/train/theoretical_loss": 3.521394851227428, + "objective/train/tokens_used": 1495020000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23337149620056152, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481371879577637, + "objective/train/weighted_lm_loss": 3.2463152408599854, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9514370560646057, + "theoretical_loss": 3.521394851227428, + "tokens_seen": 1474560000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023893303064699206, + "loss": 3.2446, + "theoretical_loss": 3.521228774884823, + "tokens_seen": 1475346432 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023874385168369278, + "loss": 3.2234, + "theoretical_loss": 3.521007515944228, + "tokens_seen": 1476395008 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002385546727203935, + "loss": 3.1922, + "theoretical_loss": 3.52078645805675, + "tokens_seen": 1477443584 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.49413806200027466, + "objective/train/docs_used": 837471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.045189380645752, + "objective/train/original_loss": 3.04518985748291, + "objective/train/theoretical_loss": 3.5207036131165568, + "objective/train/tokens_used": 1498296800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24563145637512207, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506623983383179, + "objective/train/weighted_lm_loss": 3.1994035243988037, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9573822617530823, + "theoretical_loss": 3.5207036131165568, + "tokens_seen": 1477836800 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002383654937570942, + "loss": 3.1572, + "theoretical_loss": 3.5205656008972, + "tokens_seen": 1478492160 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023817631479379492, + "loss": 3.1917, + "theoretical_loss": 3.5203449441411423, + "tokens_seen": 1479540736 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023798713583049567, + "loss": 3.1977, + "theoretical_loss": 3.5201244874648983, + "tokens_seen": 1480589312 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.4763402044773102, + "objective/train/docs_used": 839384, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.983619213104248, + "objective/train/original_loss": 2.983618974685669, + "objective/train/theoretical_loss": 3.5200143340557615, + "objective/train/tokens_used": 1501573600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23397092521190643, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488231182098389, + "objective/train/weighted_lm_loss": 3.1282737255096436, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9571855068206787, + "theoretical_loss": 3.5200143340557615, + "tokens_seen": 1481113600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023779795686719639, + "loss": 3.2497, + "theoretical_loss": 3.519904230545538, + "tokens_seen": 1481637888 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002376087779038971, + "loss": 3.1995, + "theoretical_loss": 3.5196841730608828, + "tokens_seen": 1482686464 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002374195989405978, + "loss": 3.2798, + "theoretical_loss": 3.5194643146895, + "tokens_seen": 1483735040 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.48236167430877686, + "objective/train/docs_used": 841281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0101518630981445, + "objective/train/original_loss": 3.0101518630981445, + "objective/train/theoretical_loss": 3.5193270041770703, + "objective/train/tokens_used": 1504850400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23790954053401947, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494455099105835, + "objective/train/weighted_lm_loss": 3.15853214263916, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9519292712211609, + "theoretical_loss": 3.5193270041770703, + "tokens_seen": 1484390400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023723041997729852, + "loss": 3.2623, + "theoretical_loss": 3.5192446551107017, + "tokens_seen": 1484783616 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023704124101399924, + "loss": 3.2571, + "theoretical_loss": 3.519025194004543, + "tokens_seen": 1485832192 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002368520620507, + "loss": 3.1932, + "theoretical_loss": 3.518805931051819, + "tokens_seen": 1486880768 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.47434303164482117, + "objective/train/docs_used": 843256, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2128210067749023, + "objective/train/original_loss": 3.2128210067749023, + "objective/train/theoretical_loss": 3.518641613683862, + "objective/train/tokens_used": 1508127200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2341526299715042, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486242771148682, + "objective/train/weighted_lm_loss": 3.368643045425415, + "objective/train/weights_max": 1.0512194633483887, + "objective/train/weights_min": 0.9513967037200928, + "theoretical_loss": 3.518641613683862, + "tokens_seen": 1487667200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023666288308740068, + "loss": 3.2328, + "theoretical_loss": 3.5185868659340627, + "tokens_seen": 1487929344 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002364737041241014, + "loss": 3.2408, + "theoretical_loss": 3.518367998333543, + "tokens_seen": 1488977920 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023628452516080212, + "loss": 3.1762, + "theoretical_loss": 3.518149327933262, + "tokens_seen": 1490026496 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.4800264835357666, + "objective/train/docs_used": 844947, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1356568336486816, + "objective/train/original_loss": 3.1356565952301025, + "objective/train/theoretical_loss": 3.517958152850192, + "objective/train/tokens_used": 1511404000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2361105978488922, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049202799797058, + "objective/train/weighted_lm_loss": 3.289992332458496, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9541196823120117, + "theoretical_loss": 3.517958152850192, + "tokens_seen": 1490944000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023609534619750284, + "loss": 3.321, + "theoretical_loss": 3.5179308544169543, + "tokens_seen": 1491075072 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023590616723420354, + "loss": 3.2112, + "theoretical_loss": 3.5177125774690827, + "tokens_seen": 1492123648 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023571698827090428, + "loss": 3.2648, + "theoretical_loss": 3.517494496774837, + "tokens_seen": 1493172224 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.4808512330055237, + "objective/train/docs_used": 846677, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1397337913513184, + "objective/train/original_loss": 3.1397337913513184, + "objective/train/theoretical_loss": 3.517276612020132, + "objective/train/tokens_used": 1514680800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23949705064296722, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049302339553833, + "objective/train/weighted_lm_loss": 3.2939443588256836, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9525419473648071, + "theoretical_loss": 3.517276612020132, + "tokens_seen": 1494220800 + }, + { + "epoch": 0.53, + "learning_rate": 0.000235527809307605, + "loss": 3.257, + "theoretical_loss": 3.517276612020132, + "tokens_seen": 1494220800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023533863034430573, + "loss": 3.2537, + "theoretical_loss": 3.5170589228916054, + "tokens_seen": 1495269376 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023514945138100645, + "loss": 3.2567, + "theoretical_loss": 3.516841429076615, + "tokens_seen": 1496317952 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023496027241770714, + "loss": 3.206, + "theoretical_loss": 3.516624130263237, + "tokens_seen": 1497366528 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.4818221628665924, + "objective/train/docs_used": 848503, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1971991062164307, + "objective/train/original_loss": 3.1971988677978516, + "objective/train/theoretical_loss": 3.5165969816071083, + "objective/train/tokens_used": 1517957600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24132023751735687, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494087934494019, + "objective/train/weighted_lm_loss": 3.355829954147339, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9549484848976135, + "theoretical_loss": 3.5165969816071083, + "tokens_seen": 1497497600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023477109345440786, + "loss": 3.2779, + "theoretical_loss": 3.5164070261402633, + "tokens_seen": 1498415104 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002345819144911086, + "loss": 3.2636, + "theoretical_loss": 3.516190116397201, + "tokens_seen": 1499463680 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023439273552780933, + "loss": 3.2725, + "theoretical_loss": 3.5159734007242682, + "tokens_seen": 1500512256 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.4783119261264801, + "objective/train/docs_used": 850745, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.984816312789917, + "objective/train/original_loss": 2.984816312789917, + "objective/train/theoretical_loss": 3.5159192520932576, + "objective/train/tokens_used": 1521234400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23693495988845825, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490354299545288, + "objective/train/weighted_lm_loss": 3.1317691802978516, + "objective/train/weights_max": 1.0512199401855469, + "objective/train/weights_min": 0.95163893699646, + "theoretical_loss": 3.5159192520932576, + "tokens_seen": 1500774400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023420355656451002, + "loss": 3.2283, + "theoretical_loss": 3.5157568788123923, + "tokens_seen": 1501560832 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023401437760121074, + "loss": 3.1501, + "theoretical_loss": 3.515540550353209, + "tokens_seen": 1502609408 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023382519863791146, + "loss": 3.2351, + "theoretical_loss": 3.5153244150390597, + "tokens_seen": 1503657984 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.48390939831733704, + "objective/train/docs_used": 852130, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0634584426879883, + "objective/train/original_loss": 3.0634589195251465, + "objective/train/theoretical_loss": 3.515243414028785, + "objective/train/tokens_used": 1524511200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24005842208862305, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496110916137695, + "objective/train/weighted_lm_loss": 3.215369462966919, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.954659104347229, + "theoretical_loss": 3.515243414028785, + "tokens_seen": 1504051200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023363601967461218, + "loss": 3.1928, + "theoretical_loss": 3.5151084725629884, + "tokens_seen": 1504706560 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023344684071131293, + "loss": 3.1693, + "theoretical_loss": 3.5148927226187405, + "tokens_seen": 1505755136 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023325766174801362, + "loss": 3.2186, + "theoretical_loss": 3.514677164900762, + "tokens_seen": 1506803712 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.47938430309295654, + "objective/train/docs_used": 853500, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.098717451095581, + "objective/train/original_loss": 3.0987181663513184, + "objective/train/theoretical_loss": 3.5145694580313287, + "objective/train/tokens_used": 1527788000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23806166648864746, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049148440361023, + "objective/train/weighted_lm_loss": 3.24904203414917, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9516683220863342, + "theoretical_loss": 3.5145694580313287, + "tokens_seen": 1507328000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023306848278471435, + "loss": 3.2394, + "theoretical_loss": 3.514461799104195, + "tokens_seen": 1507852288 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023287930382141507, + "loss": 3.2555, + "theoretical_loss": 3.5142466249248754, + "tokens_seen": 1508900864 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023269012485811579, + "loss": 3.2389, + "theoretical_loss": 3.5140316420593347, + "tokens_seen": 1509949440 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.48630884289741516, + "objective/train/docs_used": 854981, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7976765632629395, + "objective/train/original_loss": 2.7976765632629395, + "objective/train/theoretical_loss": 3.5138973747853353, + "objective/train/tokens_used": 1531064800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23877815902233124, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04984450340271, + "objective/train/weighted_lm_loss": 2.9376676082611084, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9745818972587585, + "theoretical_loss": 3.5138973747853353, + "tokens_seen": 1510604800 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023250094589481648, + "loss": 3.2002, + "theoretical_loss": 3.513816850204793, + "tokens_seen": 1510998016 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023231176693151723, + "loss": 3.2134, + "theoretical_loss": 3.5136022490591605, + "tokens_seen": 1512046592 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023212258796821795, + "loss": 3.2219, + "theoretical_loss": 3.5133878383210337, + "tokens_seen": 1513095168 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.4808279871940613, + "objective/train/docs_used": 857043, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.932635545730591, + "objective/train/original_loss": 2.93263578414917, + "objective/train/theoretical_loss": 3.513227155041438, + "objective/train/tokens_used": 1534341600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23708273470401764, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492877960205078, + "objective/train/weighted_lm_loss": 3.0757665634155273, + "objective/train/weights_max": 1.0512207746505737, + "objective/train/weights_min": 0.952289342880249, + "theoretical_loss": 3.513227155041438, + "tokens_seen": 1513881600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023193340900491867, + "loss": 3.2437, + "theoretical_loss": 3.513173617689695, + "tokens_seen": 1514143744 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023174423004161936, + "loss": 3.2108, + "theoretical_loss": 3.512959586865108, + "tokens_seen": 1515192320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023155505107832008, + "loss": 3.2642, + "theoretical_loss": 3.512745745547918, + "tokens_seen": 1516240896 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.4857465922832489, + "objective/train/docs_used": 859161, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.04585599899292, + "objective/train/original_loss": 3.04585599899292, + "objective/train/theoretical_loss": 3.5125587896158477, + "objective/train/tokens_used": 1537618400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2411489188671112, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498003959655762, + "objective/train/weighted_lm_loss": 3.1971046924591064, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9541661739349365, + "theoretical_loss": 3.5125587896158477, + "tokens_seen": 1517158400 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002313658721150208, + "loss": 3.1957, + "theoretical_loss": 3.5125320934394484, + "tokens_seen": 1517289472 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023117669315172155, + "loss": 3.2254, + "theoretical_loss": 3.5123186302417007, + "tokens_seen": 1518338048 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023098751418842227, + "loss": 3.189, + "theoretical_loss": 3.51210535565735, + "tokens_seen": 1519386624 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.4926300644874573, + "objective/train/docs_used": 861286, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.171475887298584, + "objective/train/original_loss": 3.171476125717163, + "objective/train/theoretical_loss": 3.511892269389743, + "objective/train/tokens_used": 1540895200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2442355751991272, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505046844482422, + "objective/train/weighted_lm_loss": 3.3321945667266846, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9602129459381104, + "theoretical_loss": 3.511892269389743, + "tokens_seen": 1520435200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023079833522512296, + "loss": 3.1388, + "theoretical_loss": 3.511892269389743, + "tokens_seen": 1520435200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023060915626182369, + "loss": 3.1666, + "theoretical_loss": 3.5116793711429004, + "tokens_seen": 1521483776 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002304199772985244, + "loss": 3.1639, + "theoretical_loss": 3.511466660621508, + "tokens_seen": 1522532352 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023023079833522513, + "loss": 3.2193, + "theoretical_loss": 3.5112541375309214, + "tokens_seen": 1523580928 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.48999133706092834, + "objective/train/docs_used": 863379, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7391910552978516, + "objective/train/original_loss": 2.7391910552978516, + "objective/train/theoretical_loss": 3.511227585308678, + "objective/train/tokens_used": 1544172000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24297131597995758, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050234317779541, + "objective/train/weighted_lm_loss": 2.8769237995147705, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9612780213356018, + "theoretical_loss": 3.511227585308678, + "tokens_seen": 1523712000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023004161937192585, + "loss": 3.2179, + "theoretical_loss": 3.511041801577159, + "tokens_seen": 1524629504 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022985244040862657, + "loss": 3.2151, + "theoretical_loss": 3.510829652466904, + "tokens_seen": 1525678080 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002296632614453273, + "loss": 3.2204, + "theoretical_loss": 3.5106176899074972, + "tokens_seen": 1526726656 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.4816513955593109, + "objective/train/docs_used": 865312, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0186238288879395, + "objective/train/original_loss": 3.0186238288879395, + "objective/train/theoretical_loss": 3.510564728381983, + "objective/train/tokens_used": 1547448800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23675787448883057, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493686199188232, + "objective/train/weighted_lm_loss": 3.1666901111602783, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9688776731491089, + "theoretical_loss": 3.510564728381983, + "tokens_seen": 1526988800 + }, + { + "epoch": 0.55, + "learning_rate": 0.000229474082482028, + "loss": 3.1873, + "theoretical_loss": 3.510405913606943, + "tokens_seen": 1527775232 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022928490351872873, + "loss": 3.2087, + "theoretical_loss": 3.510194323273899, + "tokens_seen": 1528823808 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022909572455542942, + "loss": 3.153, + "theoretical_loss": 3.5099829186176796, + "tokens_seen": 1529872384 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.494413286447525, + "objective/train/docs_used": 867418, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9637973308563232, + "objective/train/original_loss": 2.9637973308563232, + "objective/train/theoretical_loss": 3.5099036896821874, + "objective/train/tokens_used": 1550725600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24574802815914154, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506906509399414, + "objective/train/weighted_lm_loss": 3.114089250564575, + "objective/train/weights_max": 1.0512158870697021, + "objective/train/weights_min": 0.9844887256622314, + "theoretical_loss": 3.5099036896821874, + "tokens_seen": 1530265600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022890654559213017, + "loss": 3.1477, + "theoretical_loss": 3.509771699348253, + "tokens_seen": 1530920960 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002287173666288309, + "loss": 3.1358, + "theoretical_loss": 3.5095606651762368, + "tokens_seen": 1531969536 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002285281876655316, + "loss": 3.1609, + "theoretical_loss": 3.5093498158128997, + "tokens_seen": 1533018112 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.4862027168273926, + "objective/train/docs_used": 869264, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.031954526901245, + "objective/train/original_loss": 3.031954526901245, + "objective/train/theoretical_loss": 3.5092444603444344, + "objective/train/tokens_used": 1554002400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2403550148010254, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498418807983398, + "objective/train/weighted_lm_loss": 3.1832399368286133, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9602664709091187, + "theoretical_loss": 3.5092444603444344, + "tokens_seen": 1533542400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002283390087022323, + "loss": 3.1915, + "theoretical_loss": 3.509139150970157, + "tokens_seen": 1534066688 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022814982973893303, + "loss": 3.1134, + "theoretical_loss": 3.5089286703605698, + "tokens_seen": 1535115264 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022796065077563375, + "loss": 3.2115, + "theoretical_loss": 3.5087183736973437, + "tokens_seen": 1536163840 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.4853838384151459, + "objective/train/docs_used": 871030, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.866541624069214, + "objective/train/original_loss": 2.866541862487793, + "objective/train/theoretical_loss": 3.5085870315659133, + "objective/train/tokens_used": 1557279200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23986515402793884, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497575998306274, + "objective/train/weighted_lm_loss": 3.0102427005767822, + "objective/train/weights_max": 1.051215410232544, + "objective/train/weights_min": 0.9542617797851562, + "theoretical_loss": 3.5085870315659133, + "tokens_seen": 1536819200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002277714718123345, + "loss": 3.223, + "theoretical_loss": 3.5085082606943243, + "tokens_seen": 1537212416 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002275822928490352, + "loss": 3.1393, + "theoretical_loss": 3.508298331065999, + "tokens_seen": 1538260992 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002273931138857359, + "loss": 3.1253, + "theoretical_loss": 3.508088584527492, + "tokens_seen": 1539309568 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.4807642698287964, + "objective/train/docs_used": 872615, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1632378101348877, + "objective/train/original_loss": 3.1632375717163086, + "objective/train/theoretical_loss": 3.507931394605294, + "objective/train/tokens_used": 1560556000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2371273934841156, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492817163467407, + "objective/train/weighted_lm_loss": 3.3185274600982666, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9520975351333618, + "theoretical_loss": 3.507931394605294, + "tokens_seen": 1540096000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022720393492243663, + "loss": 3.1095, + "theoretical_loss": 3.5078790207945647, + "tokens_seen": 1540358144 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022701475595913735, + "loss": 3.1162, + "theoretical_loss": 3.507669639583612, + "tokens_seen": 1541406720 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022682557699583807, + "loss": 3.1987, + "theoretical_loss": 3.5074604406116627, + "tokens_seen": 1542455296 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.49242639541625977, + "objective/train/docs_used": 874504, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1098110675811768, + "objective/train/original_loss": 3.1098108291625977, + "objective/train/theoretical_loss": 3.507277540782165, + "objective/train/tokens_used": 1563832800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24409788846969604, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504834651947021, + "objective/train/weighted_lm_loss": 3.2666797637939453, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9536768198013306, + "theoretical_loss": 3.507277540782165, + "tokens_seen": 1543372800 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002266363980325388, + "loss": 3.2216, + "theoretical_loss": 3.507251423596374, + "tokens_seen": 1543503872 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002264472190692395, + "loss": 3.1687, + "theoretical_loss": 3.5070425882560343, + "tokens_seen": 1544552448 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022625804010594023, + "loss": 3.2415, + "theoretical_loss": 3.506833934309558, + "tokens_seen": 1545601024 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.48686209321022034, + "objective/train/docs_used": 876453, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9117236137390137, + "objective/train/original_loss": 2.9117236137390137, + "objective/train/theoretical_loss": 3.5066254614764842, + "objective/train/tokens_used": 1567109600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24117165803909302, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499120950698853, + "objective/train/weighted_lm_loss": 3.0574209690093994, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9522051215171814, + "theoretical_loss": 3.5066254614764842, + "tokens_seen": 1546649600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022606886114264095, + "loss": 3.2028, + "theoretical_loss": 3.5066254614764842, + "tokens_seen": 1546649600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022587968217934164, + "loss": 3.2477, + "theoretical_loss": 3.5064171694769763, + "tokens_seen": 1547698176 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022569050321604237, + "loss": 3.2099, + "theoretical_loss": 3.5062090580318186, + "tokens_seen": 1548746752 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002255013242527431, + "loss": 3.232, + "theoretical_loss": 3.506001126862416, + "tokens_seen": 1549795328 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.487368643283844, + "objective/train/docs_used": 878255, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.103156566619873, + "objective/train/original_loss": 3.103156566619873, + "objective/train/theoretical_loss": 3.5059751481280284, + "objective/train/tokens_used": 1570386400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24058666825294495, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049959659576416, + "objective/train/weighted_lm_loss": 3.2585041522979736, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9520174860954285, + "theoretical_loss": 3.5059751481280284, + "tokens_seen": 1549926400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022531214528944383, + "loss": 3.2565, + "theoretical_loss": 3.505793375690791, + "tokens_seen": 1550843904 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022512296632614455, + "loss": 3.1939, + "theoretical_loss": 3.5055858042395815, + "tokens_seen": 1551892480 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022493378736284525, + "loss": 3.155, + "theoretical_loss": 3.5053784122320417, + "tokens_seen": 1552941056 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.4909485876560211, + "objective/train/docs_used": 880445, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8173828125, + "objective/train/original_loss": 2.8173828125, + "objective/train/theoretical_loss": 3.505326592235857, + "objective/train/tokens_used": 1573663200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24372120201587677, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050333857536316, + "objective/train/weighted_lm_loss": 2.9595131874084473, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9670907855033875, + "theoretical_loss": 3.505326592235857, + "tokens_seen": 1553203200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022474460839954597, + "loss": 3.1608, + "theoretical_loss": 3.505171199392036, + "tokens_seen": 1553989632 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002245554294362467, + "loss": 3.1919, + "theoretical_loss": 3.504964165444042, + "tokens_seen": 1555038208 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022436625047294744, + "loss": 3.1549, + "theoretical_loss": 3.504757310113145, + "tokens_seen": 1556086784 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.4909573793411255, + "objective/train/docs_used": 881444, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.154583215713501, + "objective/train/original_loss": 3.15458345413208, + "objective/train/theoretical_loss": 3.504679785357773, + "objective/train/tokens_used": 1576940000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24360138177871704, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503339767456055, + "objective/train/weighted_lm_loss": 3.3135523796081543, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9545013904571533, + "theoretical_loss": 3.504679785357773, + "tokens_seen": 1556480000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022417707150964813, + "loss": 3.203, + "theoretical_loss": 3.5045506331250382, + "tokens_seen": 1557135360 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022398789254634885, + "loss": 3.1861, + "theoretical_loss": 3.50434413420602, + "tokens_seen": 1558183936 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022379871358304957, + "loss": 3.1586, + "theoretical_loss": 3.504137813082994, + "tokens_seen": 1559232512 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.47179633378982544, + "objective/train/docs_used": 883210, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7676148414611816, + "objective/train/original_loss": 2.7676148414611816, + "objective/train/theoretical_loss": 3.504034719109799, + "objective/train/tokens_used": 1580216800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23406490683555603, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04836905002594, + "objective/train/weighted_lm_loss": 2.9026639461517334, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9523040652275085, + "theoretical_loss": 3.504034719109799, + "tokens_seen": 1559756800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002236095346197503, + "loss": 3.1069, + "theoretical_loss": 3.5039316694834635, + "tokens_seen": 1560281088 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022342035565645098, + "loss": 3.1383, + "theoretical_loss": 3.5037257031355344, + "tokens_seen": 1561329664 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022323117669315173, + "loss": 3.1619, + "theoretical_loss": 3.5035199137679105, + "tokens_seen": 1562378240 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.4887843728065491, + "objective/train/docs_used": 884815, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8651185035705566, + "objective/train/original_loss": 2.8651180267333984, + "objective/train/theoretical_loss": 3.50339138516565, + "objective/train/tokens_used": 1583493600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24095383286476135, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501031875610352, + "objective/train/weighted_lm_loss": 3.0087907314300537, + "objective/train/weights_max": 1.0512157678604126, + "objective/train/weights_min": 0.957360029220581, + "theoretical_loss": 3.50339138516565, + "tokens_seen": 1563033600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022304199772985245, + "loss": 3.234, + "theoretical_loss": 3.503314301109892, + "tokens_seen": 1563426816 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022285281876655317, + "loss": 3.1352, + "theoretical_loss": 3.5031088648913755, + "tokens_seen": 1564475392 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002226636398032539, + "loss": 3.2193, + "theoretical_loss": 3.5029036048428503, + "tokens_seen": 1565523968 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.48724234104156494, + "objective/train/docs_used": 886685, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.778803825378418, + "objective/train/original_loss": 2.778803825378418, + "objective/train/theoretical_loss": 3.5027497752562198, + "objective/train/tokens_used": 1586770400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23980683088302612, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499430894851685, + "objective/train/weighted_lm_loss": 2.9186336994171143, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9529886245727539, + "theoretical_loss": 3.5027497752562198, + "tokens_seen": 1566310400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002224744608399546, + "loss": 3.1987, + "theoretical_loss": 3.502698520695398, + "tokens_seen": 1566572544 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002222852818766553, + "loss": 3.0677, + "theoretical_loss": 3.5024936121806896, + "tokens_seen": 1567621120 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022209610291335606, + "loss": 3.1452, + "theoretical_loss": 3.502288879030986, + "tokens_seen": 1568669696 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.4725601077079773, + "objective/train/docs_used": 888616, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0799059867858887, + "objective/train/original_loss": 3.0799059867858887, + "objective/train/theoretical_loss": 3.5021098811690674, + "objective/train/tokens_used": 1590047200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.233866885304451, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484446287155151, + "objective/train/weighted_lm_loss": 3.228604316711426, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9518508315086365, + "theoretical_loss": 3.5021098811690674, + "tokens_seen": 1569587200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022190692395005678, + "loss": 3.1714, + "theoretical_loss": 3.5020843209791326, + "tokens_seen": 1569718272 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022171774498675747, + "loss": 3.1314, + "theoretical_loss": 3.501879937758562, + "tokens_seen": 1570766848 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002215285660234582, + "loss": 3.1833, + "theoretical_loss": 3.5016757291032903, + "tokens_seen": 1571815424 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.48864632844924927, + "objective/train/docs_used": 890514, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.864938259124756, + "objective/train/original_loss": 2.864938259124756, + "objective/train/theoretical_loss": 3.501471694747913, + "objective/train/tokens_used": 1593324000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24329259991645813, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501012802124023, + "objective/train/weighted_lm_loss": 3.0083770751953125, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9517747163772583, + "theoretical_loss": 3.501471694747913, + "tokens_seen": 1572864000 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002213393870601589, + "loss": 3.1617, + "theoretical_loss": 3.501471694747913, + "tokens_seen": 1572864000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022115020809685963, + "loss": 3.2548, + "theoretical_loss": 3.5012678344276082, + "tokens_seen": 1573912576 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022096102913356035, + "loss": 3.1995, + "theoretical_loss": 3.5010641478781306, + "tokens_seen": 1574961152 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022077185017026107, + "loss": 3.2114, + "theoretical_loss": 3.5008606348358136, + "tokens_seen": 1576009728 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.4617477357387543, + "objective/train/docs_used": 892478, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0483834743499756, + "objective/train/original_loss": 3.0483834743499756, + "objective/train/theoretical_loss": 3.5008352078921368, + "objective/train/tokens_used": 1596600800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.227500781416893, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0473309755325317, + "objective/train/weighted_lm_loss": 3.1940760612487793, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9527806043624878, + "theoretical_loss": 3.5008352078921368, + "tokens_seen": 1576140800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002205826712069618, + "loss": 3.2382, + "theoretical_loss": 3.5006572950375645, + "tokens_seen": 1577058304 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002203934922436625, + "loss": 3.1722, + "theoretical_loss": 3.5004541282208637, + "tokens_seen": 1578106880 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022020431328036323, + "loss": 3.1883, + "theoretical_loss": 3.500251134123765, + "tokens_seen": 1579155456 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.48032376170158386, + "objective/train/docs_used": 894462, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.296074390411377, + "objective/train/original_loss": 3.296074390411377, + "objective/train/theoretical_loss": 3.5002004125562856, + "objective/train/tokens_used": 1599877600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23856282234191895, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492448806762695, + "objective/train/weighted_lm_loss": 3.4574201107025146, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9516441226005554, + "theoretical_loss": 3.5002004125562856, + "tokens_seen": 1579417600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00022001513431706393, + "loss": 3.2023, + "theoretical_loss": 3.500048312484891, + "tokens_seen": 1580204032 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021982595535376468, + "loss": 3.2244, + "theoretical_loss": 3.4998456630434336, + "tokens_seen": 1581252608 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002196367763904654, + "loss": 3.1959, + "theoretical_loss": 3.499643185539152, + "tokens_seen": 1582301184 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.47416922450065613, + "objective/train/docs_used": 896326, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.748246431350708, + "objective/train/original_loss": 2.748246431350708, + "objective/train/theoretical_loss": 3.4995673007495816, + "objective/train/tokens_used": 1603154400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2362240105867386, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486173629760742, + "objective/train/weighted_lm_loss": 2.8795387744903564, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.951950192451477, + "theoretical_loss": 3.4995673007495816, + "tokens_seen": 1582694400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021944759742716612, + "loss": 3.173, + "theoretical_loss": 3.4994408797123704, + "tokens_seen": 1583349760 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002192584184638668, + "loss": 3.1532, + "theoretical_loss": 3.499238745303977, + "tokens_seen": 1584398336 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021906923950056753, + "loss": 3.163, + "theoretical_loss": 3.4990367820554216, + "tokens_seen": 1585446912 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.48116692900657654, + "objective/train/docs_used": 897668, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.866941213607788, + "objective/train/original_loss": 2.866940975189209, + "objective/train/theoretical_loss": 3.49893586453544, + "objective/train/tokens_used": 1606431200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23899151384830475, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493313074111938, + "objective/train/weighted_lm_loss": 3.009093999862671, + "objective/train/weights_max": 1.051215410232544, + "objective/train/weights_min": 0.951438844203949, + "theoretical_loss": 3.49893586453544, + "tokens_seen": 1585971200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021888006053726825, + "loss": 3.1054, + "theoretical_loss": 3.498834989708716, + "tokens_seen": 1586495488 + }, + { + "epoch": 0.57, + "learning_rate": 0.000218690881573969, + "loss": 3.1044, + "theoretical_loss": 3.4986333680064297, + "tokens_seen": 1587544064 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021850170261066972, + "loss": 3.1573, + "theoretical_loss": 3.4984319166916906, + "tokens_seen": 1588592640 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.44459864497184753, + "objective/train/docs_used": 899481, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.089205503463745, + "objective/train/original_loss": 3.089205503463745, + "objective/train/theoretical_loss": 3.4983060960309915, + "objective/train/tokens_used": 1609708000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.233369842171669, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.045644998550415, + "objective/train/weighted_lm_loss": 3.2272956371307373, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9539871215820312, + "theoretical_loss": 3.4983060960309915, + "tokens_seen": 1589248000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002183125236473704, + "loss": 3.099, + "theoretical_loss": 3.4982306355081825, + "tokens_seen": 1589641216 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021812334468407113, + "loss": 3.1155, + "theoretical_loss": 3.4980295242001422, + "tokens_seen": 1590689792 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021793416572077185, + "loss": 3.1532, + "theoretical_loss": 3.497828582512361, + "tokens_seen": 1591738368 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.4910542070865631, + "objective/train/docs_used": 902104, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7395198345184326, + "objective/train/original_loss": 2.7395195960998535, + "objective/train/theoretical_loss": 3.4976779874066066, + "objective/train/tokens_used": 1612984800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24307867884635925, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503411293029785, + "objective/train/weighted_lm_loss": 2.8776791095733643, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.95230633020401, + "theoretical_loss": 3.4976779874066066, + "tokens_seen": 1592524800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021774498675747257, + "loss": 3.0806, + "theoretical_loss": 3.4976278101901803, + "tokens_seen": 1592786944 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002175558077941733, + "loss": 3.1768, + "theoretical_loss": 3.4974272069794914, + "tokens_seen": 1593835520 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021736662883087402, + "loss": 3.1855, + "theoretical_loss": 3.4972267726267336, + "tokens_seen": 1594884096 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.48677858710289, + "objective/train/docs_used": 903830, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6319079399108887, + "objective/train/original_loss": 2.6319079399108887, + "objective/train/theoretical_loss": 3.497051530885427, + "objective/train/tokens_used": 1616261600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23925882577896118, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498939752578735, + "objective/train/weighted_lm_loss": 2.763871192932129, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9580140113830566, + "theoretical_loss": 3.497051530885427, + "tokens_seen": 1595801600 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021717744986757474, + "loss": 3.1649, + "theoretical_loss": 3.4970265068788944, + "tokens_seen": 1595932672 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021698827090427546, + "loss": 3.1523, + "theoretical_loss": 3.4968264094835027, + "tokens_seen": 1596981248 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021679909194097615, + "loss": 3.1599, + "theoretical_loss": 3.4966264801886346, + "tokens_seen": 1598029824 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.49028122425079346, + "objective/train/docs_used": 906083, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0217175483703613, + "objective/train/original_loss": 3.0217177867889404, + "objective/train/theoretical_loss": 3.4964267187429066, + "objective/train/tokens_used": 1619538400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24274671077728271, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502620935440063, + "objective/train/weighted_lm_loss": 3.173563003540039, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9540677070617676, + "theoretical_loss": 3.4964267187429066, + "tokens_seen": 1599078400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021660991297767687, + "loss": 3.1587, + "theoretical_loss": 3.4964267187429066, + "tokens_seen": 1599078400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021642073401437762, + "loss": 3.134, + "theoretical_loss": 3.4962271248954755, + "tokens_seen": 1600126976 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021623155505107834, + "loss": 3.1665, + "theoretical_loss": 3.4960276983960368, + "tokens_seen": 1601175552 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021604237608777906, + "loss": 3.0887, + "theoretical_loss": 3.495828438994824, + "tokens_seen": 1602224128 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.4898524582386017, + "objective/train/docs_used": 907881, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.784710645675659, + "objective/train/original_loss": 2.784710168838501, + "objective/train/theoretical_loss": 3.495803543306348, + "objective/train/tokens_used": 1622815200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2425891011953354, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502182245254517, + "objective/train/weighted_lm_loss": 2.924830436706543, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.95200514793396, + "theoretical_loss": 3.495803543306348, + "tokens_seen": 1602355200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021585319712447975, + "loss": 3.1445, + "theoretical_loss": 3.495629346442607, + "tokens_seen": 1603272704 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021566401816118047, + "loss": 3.1624, + "theoretical_loss": 3.4954304204906896, + "tokens_seen": 1604321280 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002154748391978812, + "loss": 3.1413, + "theoretical_loss": 3.4952316608909078, + "tokens_seen": 1605369856 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.4899497926235199, + "objective/train/docs_used": 909847, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2979307174682617, + "objective/train/original_loss": 3.2979307174682617, + "objective/train/theoretical_loss": 3.495181996954453, + "objective/train/tokens_used": 1626092000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24249549210071564, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502275228500366, + "objective/train/weighted_lm_loss": 3.4631145000457764, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9523872137069702, + "theoretical_loss": 3.495181996954453, + "tokens_seen": 1605632000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021528566023458194, + "loss": 3.1345, + "theoretical_loss": 3.49503306739563, + "tokens_seen": 1606418432 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021509648127128263, + "loss": 3.078, + "theoretical_loss": 3.4948346397577543, + "tokens_seen": 1607467008 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021490730230798336, + "loss": 3.1708, + "theoretical_loss": 3.4946363777307075, + "tokens_seen": 1608515584 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.4887382686138153, + "objective/train/docs_used": 911828, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.157907247543335, + "objective/train/original_loss": 3.1579067707061768, + "objective/train/theoretical_loss": 3.4945620721168713, + "objective/train/tokens_used": 1629368800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24136415123939514, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501006841659546, + "objective/train/weighted_lm_loss": 3.3168606758117676, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.951468825340271, + "theoretical_loss": 3.4945620721168713, + "tokens_seen": 1608908800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00021471812334468408, + "loss": 3.1846, + "theoretical_loss": 3.494438281068443, + "tokens_seen": 1609564160 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002145289443813848, + "loss": 3.1879, + "theoretical_loss": 3.4942403495254393, + "tokens_seen": 1610612736 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002143397654180855, + "loss": 3.1535, + "theoretical_loss": 3.4940425828567, + "tokens_seen": 1611661312 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.4839933216571808, + "objective/train/docs_used": 913095, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9910757541656494, + "objective/train/original_loss": 2.9910759925842285, + "objective/train/theoretical_loss": 3.493943761273761, + "objective/train/tokens_used": 1632645600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23838478326797485, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04961097240448, + "objective/train/weighted_lm_loss": 3.1397407054901123, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9523307681083679, + "theoretical_loss": 3.493943761273761, + "tokens_seen": 1612185600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021415058645478624, + "loss": 3.1622, + "theoretical_loss": 3.4938449808177516, + "tokens_seen": 1612709888 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021396140749148696, + "loss": 3.1853, + "theoretical_loss": 3.4936475431646397, + "tokens_seen": 1613758464 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021377222852818768, + "loss": 3.2193, + "theoretical_loss": 3.4934502696539322, + "tokens_seen": 1614807040 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.46676307916641235, + "objective/train/docs_used": 915136, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3160529136657715, + "objective/train/original_loss": 3.3160529136657715, + "objective/train/theoretical_loss": 3.493327056955347, + "objective/train/tokens_used": 1635922400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23430095613002777, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478670597076416, + "objective/train/weighted_lm_loss": 3.474302291870117, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9522300958633423, + "theoretical_loss": 3.493327056955347, + "tokens_seen": 1615462400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002135830495648884, + "loss": 3.1691, + "theoretical_loss": 3.493253160042713, + "tokens_seen": 1615855616 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002133938706015891, + "loss": 3.1744, + "theoretical_loss": 3.4930562140885844, + "tokens_seen": 1616904192 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002132046916382898, + "loss": 3.1364, + "theoretical_loss": 3.492859431549663, + "tokens_seen": 1617952768 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.4899651110172272, + "objective/train/docs_used": 916739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3264360427856445, + "objective/train/original_loss": 3.3264360427856445, + "objective/train/theoretical_loss": 3.4927119517414846, + "objective/train/tokens_used": 1639199200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2438695728778839, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502362251281738, + "objective/train/weighted_lm_loss": 3.493744373321533, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.951538622379303, + "theoretical_loss": 3.4927119517414846, + "tokens_seen": 1618739200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021301551267499056, + "loss": 3.1957, + "theoretical_loss": 3.49266281218458, + "tokens_seen": 1619001344 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021282633371169128, + "loss": 3.2048, + "theoretical_loss": 3.49246635575248, + "tokens_seen": 1620049920 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021263715474839197, + "loss": 3.1662, + "theoretical_loss": 3.4922700620130174, + "tokens_seen": 1621098496 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.4800923466682434, + "objective/train/docs_used": 918332, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.748319625854492, + "objective/train/original_loss": 2.748319625854492, + "objective/train/theoretical_loss": 3.4920984382612357, + "objective/train/tokens_used": 1642476000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23431384563446045, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049200177192688, + "objective/train/weighted_lm_loss": 2.885237693786621, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9522053599357605, + "theoretical_loss": 3.4920984382612357, + "tokens_seen": 1622016000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002124479757850927, + "loss": 3.1469, + "theoretical_loss": 3.492073930726355, + "tokens_seen": 1622147072 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021225879682179342, + "loss": 3.1629, + "theoretical_loss": 3.491877961653168, + "tokens_seen": 1623195648 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021206961785849414, + "loss": 3.1635, + "theoretical_loss": 3.4916821545546344, + "tokens_seen": 1624244224 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.4663797616958618, + "objective/train/docs_used": 920327, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1265220642089844, + "objective/train/original_loss": 3.1265220642089844, + "objective/train/theoretical_loss": 3.4914865091924394, + "objective/train/tokens_used": 1645752800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2259867787361145, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0477863550186157, + "objective/train/weighted_lm_loss": 3.2764687538146973, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9532047510147095, + "theoretical_loss": 3.4914865091924394, + "tokens_seen": 1625292800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021188043889519488, + "loss": 3.1512, + "theoretical_loss": 3.4914865091924394, + "tokens_seen": 1625292800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021169125993189558, + "loss": 3.2119, + "theoretical_loss": 3.4912910253287732, + "tokens_seen": 1626341376 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002115020809685963, + "loss": 3.2025, + "theoretical_loss": 3.4910957027263274, + "tokens_seen": 1627389952 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021131290200529702, + "loss": 3.1581, + "theoretical_loss": 3.490900541148295, + "tokens_seen": 1628438528 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.4907952547073364, + "objective/train/docs_used": 922281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.313007354736328, + "objective/train/original_loss": 3.313007354736328, + "objective/train/theoretical_loss": 3.4908761572612947, + "objective/train/tokens_used": 1649029600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24451680481433868, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050322413444519, + "objective/train/weighted_lm_loss": 3.4795234203338623, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9518669247627258, + "theoretical_loss": 3.4908761572612947, + "tokens_seen": 1628569600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021112372304199774, + "loss": 3.0487, + "theoretical_loss": 3.490705540358369, + "tokens_seen": 1629487104 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021093454407869843, + "loss": 3.1641, + "theoretical_loss": 3.4905107001207414, + "tokens_seen": 1630535680 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021074536511539918, + "loss": 3.0879, + "theoretical_loss": 3.490316020200101, + "tokens_seen": 1631584256 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.4708525240421295, + "objective/train/docs_used": 923944, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9695937633514404, + "objective/train/original_loss": 2.9695937633514404, + "objective/train/theoretical_loss": 3.4902673752419417, + "objective/train/tokens_used": 1652306400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23232224583625793, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048266053199768, + "objective/train/weighted_lm_loss": 3.110866069793701, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9532876014709473, + "theoretical_loss": 3.4902673752419417, + "tokens_seen": 1631846400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002105561861520999, + "loss": 3.1031, + "theoretical_loss": 3.4901215003616333, + "tokens_seen": 1632632832 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021036700718880062, + "loss": 3.0844, + "theoretical_loss": 3.4899271403710164, + "tokens_seen": 1633681408 + }, + { + "epoch": 0.58, + "learning_rate": 0.00021017782822550131, + "loss": 3.1156, + "theoretical_loss": 3.4897329399944237, + "tokens_seen": 1634729984 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.48439714312553406, + "objective/train/docs_used": 925798, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8560261726379395, + "objective/train/original_loss": 2.8560264110565186, + "objective/train/theoretical_loss": 3.4896601559560523, + "objective/train/tokens_used": 1655583200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23803523182868958, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496495962142944, + "objective/train/weighted_lm_loss": 2.9979634284973145, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9529846906661987, + "theoretical_loss": 3.4896601559560523, + "tokens_seen": 1635123200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020998864926220204, + "loss": 3.0956, + "theoretical_loss": 3.4895388989985188, + "tokens_seen": 1635778560 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020979947029890276, + "loss": 3.111, + "theoretical_loss": 3.4893450171504563, + "tokens_seen": 1636827136 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002096102913356035, + "loss": 3.0926, + "theoretical_loss": 3.4891512942178795, + "tokens_seen": 1637875712 + }, + { + "debugging/Self-BLEU-5": 0.4862537472725578, + "debugging/distinct-1-grams": 0.7771784690107355, + "debugging/distinct-2-grams": 0.9668669456652452, + "debugging/entropy-1-grams": 6.202714981817353, + "debugging/entropy-2-grams": 7.211883548596637, + "debugging/length": 471.0, + "debugging/num_segments": 21, + "debugging/raw_token_scores_avg": 0.013972360640764236, + "debugging/raw_token_scores_std": 0.06252207607030869, + "epoch": 0.59, + "objective/train/advantage_avg": 0.4860203266143799, + "objective/train/docs_used": 927402, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2737643718719482, + "objective/train/original_loss": 3.2737646102905273, + "objective/train/theoretical_loss": 3.4890544922724205, + "objective/train/tokens_used": 1658860000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2401282638311386, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498225688934326, + "objective/train/weighted_lm_loss": 3.4373350143432617, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9551194906234741, + "theoretical_loss": 3.4890544922724205, + "tokens_seen": 1638400000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020942111237230422, + "loss": 3.1852, + "theoretical_loss": 3.48895772996892, + "tokens_seen": 1638924288 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020923193340900492, + "loss": 3.1467, + "theoretical_loss": 3.4887643241721955, + "tokens_seen": 1639972864 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020904275444570564, + "loss": 3.09, + "theoretical_loss": 3.4885710765968088, + "tokens_seen": 1641021440 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.48904502391815186, + "objective/train/docs_used": 928835, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1367592811584473, + "objective/train/original_loss": 3.1367592811584473, + "objective/train/theoretical_loss": 3.4884503771065636, + "objective/train/tokens_used": 1662136800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24231131374835968, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501362085342407, + "objective/train/weighted_lm_loss": 3.2936394214630127, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9644740223884583, + "theoretical_loss": 3.4884503771065636, + "tokens_seen": 1641676800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020885357548240636, + "loss": 3.0997, + "theoretical_loss": 3.4883779870123455, + "tokens_seen": 1642070016 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020866439651910708, + "loss": 3.1374, + "theoretical_loss": 3.488185055188876, + "tokens_seen": 1643118592 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002084752175558078, + "loss": 3.1162, + "theoretical_loss": 3.4879922808969486, + "tokens_seen": 1644167168 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.4872593283653259, + "objective/train/docs_used": 930288, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.094754695892334, + "objective/train/original_loss": 3.094754457473755, + "objective/train/theoretical_loss": 3.48784780342032, + "objective/train/tokens_used": 1665413600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24150964617729187, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049953579902649, + "objective/train/weighted_lm_loss": 3.2483839988708496, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9528509378433228, + "theoretical_loss": 3.48784780342032, + "tokens_seen": 1644953600 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020828603859250852, + "loss": 3.1739, + "theoretical_loss": 3.4877996639075937, + "tokens_seen": 1645215744 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020809685962920924, + "loss": 3.1529, + "theoretical_loss": 3.4876072039923196, + "tokens_seen": 1646264320 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020790768066590996, + "loss": 3.1419, + "theoretical_loss": 3.4874149009231123, + "tokens_seen": 1647312896 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.4832485616207123, + "objective/train/docs_used": 932466, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.70462965965271, + "objective/train/original_loss": 2.704629898071289, + "objective/train/theoretical_loss": 3.4872467642214566, + "objective/train/tokens_used": 1668690400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23652879893779755, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495271682739258, + "objective/train/weighted_lm_loss": 2.8395895957946777, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9560015797615051, + "theoretical_loss": 3.4872467642214566, + "tokens_seen": 1648230400 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020771850170261068, + "loss": 3.1129, + "theoretical_loss": 3.4872227544724312, + "tokens_seen": 1648361472 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020752932273931138, + "loss": 3.1736, + "theoretical_loss": 3.487030764413214, + "tokens_seen": 1649410048 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020734014377601212, + "loss": 3.1707, + "theoretical_loss": 3.4868389305188687, + "tokens_seen": 1650458624 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.48778533935546875, + "objective/train/docs_used": 934348, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9127755165100098, + "objective/train/original_loss": 2.9127755165100098, + "objective/train/theoretical_loss": 3.4866472525632766, + "objective/train/tokens_used": 1671967200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24155890941619873, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.05000638961792, + "objective/train/weighted_lm_loss": 3.0580127239227295, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9531963467597961, + "theoretical_loss": 3.4866472525632766, + "tokens_seen": 1651507200 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020715096481271284, + "loss": 3.096, + "theoretical_loss": 3.4866472525632766, + "tokens_seen": 1651507200 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020696178584941356, + "loss": 3.1461, + "theoretical_loss": 3.486455730320789, + "tokens_seen": 1652555776 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020677260688611426, + "loss": 3.0677, + "theoretical_loss": 3.486264363566228, + "tokens_seen": 1653604352 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020658342792281498, + "loss": 3.1543, + "theoretical_loss": 3.4860731520748827, + "tokens_seen": 1654652928 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.47505781054496765, + "objective/train/docs_used": 936266, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9631354808807373, + "objective/train/original_loss": 2.963135242462158, + "objective/train/theoretical_loss": 3.4860492615442356, + "objective/train/tokens_used": 1675244000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23247912526130676, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04868745803833, + "objective/train/weighted_lm_loss": 3.1075222492218018, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9551524519920349, + "theoretical_loss": 3.4860492615442356, + "tokens_seen": 1654784000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002063942489595157, + "loss": 3.0798, + "theoretical_loss": 3.4858820956225083, + "tokens_seen": 1655701504 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020620506999621645, + "loss": 3.0016, + "theoretical_loss": 3.4856911939853283, + "tokens_seen": 1656750080 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020601589103291714, + "loss": 3.1524, + "theoretical_loss": 3.485500446940028, + "tokens_seen": 1657798656 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.4916464686393738, + "objective/train/docs_used": 938319, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8522729873657227, + "objective/train/original_loss": 2.8522729873657227, + "objective/train/theoretical_loss": 3.485452784307559, + "objective/train/tokens_used": 1678520800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24374902248382568, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504037141799927, + "objective/train/weighted_lm_loss": 2.996385335922241, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.994926393032074, + "theoretical_loss": 3.485452784307559, + "tokens_seen": 1658060800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020582671206961786, + "loss": 3.0695, + "theoretical_loss": 3.4853098542637566, + "tokens_seen": 1658847232 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020563753310631858, + "loss": 3.0393, + "theoretical_loss": 3.4851194157341263, + "tokens_seen": 1659895808 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002054483541430193, + "loss": 3.1038, + "theoretical_loss": 3.484929131129207, + "tokens_seen": 1660944384 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.48630455136299133, + "objective/train/docs_used": 940185, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.038881540298462, + "objective/train/original_loss": 3.038881301879883, + "objective/train/theoretical_loss": 3.4848578140408613, + "objective/train/tokens_used": 1681797600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24110354483127594, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498559474945068, + "objective/train/weighted_lm_loss": 3.1900389194488525, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9526039958000183, + "theoretical_loss": 3.4848578140408613, + "tokens_seen": 1661337600 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020525917517972002, + "loss": 3.1271, + "theoretical_loss": 3.484739000227532, + "tokens_seen": 1661992960 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020506999621642074, + "loss": 3.1116, + "theoretical_loss": 3.48454902280809, + "tokens_seen": 1663041536 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020488081725312146, + "loss": 3.106, + "theoretical_loss": 3.484359198650326, + "tokens_seen": 1664090112 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.46543005108833313, + "objective/train/docs_used": 942299, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8884527683258057, + "objective/train/original_loss": 2.8884522914886475, + "objective/train/theoretical_loss": 3.4842643439757754, + "objective/train/tokens_used": 1685074400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2315172702074051, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0477193593978882, + "objective/train/weighted_lm_loss": 3.0250158309936523, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9514710903167725, + "theoretical_loss": 3.4842643439757754, + "tokens_seen": 1664614400 + }, + { + "epoch": 0.59, + "learning_rate": 0.00020469163828982218, + "loss": 3.097, + "theoretical_loss": 3.484169527534143, + "tokens_seen": 1665138688 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002045024593265229, + "loss": 3.0913, + "theoretical_loss": 3.4839800092398967, + "tokens_seen": 1666187264 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002043132803632236, + "loss": 3.1178, + "theoretical_loss": 3.4837906435483967, + "tokens_seen": 1667235840 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.49083784222602844, + "objective/train/docs_used": 944133, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.160414695739746, + "objective/train/original_loss": 3.160414218902588, + "objective/train/theoretical_loss": 3.4836723673875793, + "objective/train/tokens_used": 1688351200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2420562356710434, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.05031418800354, + "objective/train/weighted_lm_loss": 3.320441484451294, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9800860285758972, + "theoretical_loss": 3.4836723673875793, + "tokens_seen": 1667891200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020412410139992432, + "loss": 3.1704, + "theoretical_loss": 3.4836014302409053, + "tokens_seen": 1668284416 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020393492243662507, + "loss": 3.1694, + "theoretical_loss": 3.4834123690991334, + "tokens_seen": 1669332992 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020374574347332579, + "loss": 3.1643, + "theoretical_loss": 3.4832234599052434, + "tokens_seen": 1670381568 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.48427098989486694, + "objective/train/docs_used": 945841, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.058107852935791, + "objective/train/original_loss": 3.05810809135437, + "objective/train/theoretical_loss": 3.4830818775948327, + "objective/train/tokens_used": 1691628000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2393985241651535, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049643874168396, + "objective/train/weighted_lm_loss": 3.209540843963623, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9518405795097351, + "theoretical_loss": 3.4830818775948327, + "tokens_seen": 1671168000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020355656451002648, + "loss": 3.1219, + "theoretical_loss": 3.483034702441845, + "tokens_seen": 1671430144 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002033673855467272, + "loss": 3.178, + "theoretical_loss": 3.4828460964919965, + "tokens_seen": 1672478720 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020317820658342792, + "loss": 3.1241, + "theoretical_loss": 3.4826576418392, + "tokens_seen": 1673527296 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.4750880002975464, + "objective/train/docs_used": 947805, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0682384967803955, + "objective/train/original_loss": 3.0682387351989746, + "objective/train/theoretical_loss": 3.48249286795901, + "objective/train/tokens_used": 1694904800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23441539704799652, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048700213432312, + "objective/train/weighted_lm_loss": 3.2168827056884766, + "objective/train/weights_max": 1.0512195825576782, + "objective/train/weights_min": 0.9514750838279724, + "theoretical_loss": 3.48249286795901, + "tokens_seen": 1674444800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020298902762012864, + "loss": 3.1192, + "theoretical_loss": 3.4824693382674043, + "tokens_seen": 1674575872 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002027998486568294, + "loss": 3.1398, + "theoretical_loss": 3.4822811855610007, + "tokens_seen": 1675624448 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020261066969353008, + "loss": 3.1768, + "theoretical_loss": 3.4820931835048223, + "tokens_seen": 1676673024 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.48598864674568176, + "objective/train/docs_used": 948902, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.975940227508545, + "objective/train/original_loss": 2.975940704345703, + "objective/train/theoretical_loss": 3.4819053318841453, + "objective/train/tokens_used": 1698181600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2399335503578186, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049818515777588, + "objective/train/weighted_lm_loss": 3.1240224838256836, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9528480172157288, + "theoretical_loss": 3.4819053318841453, + "tokens_seen": 1677721600 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002024214907302308, + "loss": 3.2016, + "theoretical_loss": 3.4819053318841453, + "tokens_seen": 1677721600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020223231176693152, + "loss": 3.1829, + "theoretical_loss": 3.481717630484684, + "tokens_seen": 1678770176 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020204313280363224, + "loss": 3.1091, + "theoretical_loss": 3.481530079092593, + "tokens_seen": 1679818752 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020185395384033294, + "loss": 3.1407, + "theoretical_loss": 3.481342677494464, + "tokens_seen": 1680867328 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.47322916984558105, + "objective/train/docs_used": 950589, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0295519828796387, + "objective/train/original_loss": 3.0295517444610596, + "objective/train/theoretical_loss": 3.481319262816474, + "objective/train/tokens_used": 1701458400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23179136216640472, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485007762908936, + "objective/train/weighted_lm_loss": 3.177227258682251, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9534991383552551, + "theoretical_loss": 3.481319262816474, + "tokens_seen": 1680998400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020166477487703369, + "loss": 3.159, + "theoretical_loss": 3.4811554254773243, + "tokens_seen": 1681915904 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002014755959137344, + "loss": 3.0974, + "theoretical_loss": 3.4809683228286374, + "tokens_seen": 1682964480 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020128641695043513, + "loss": 3.1729, + "theoretical_loss": 3.4807813693363023, + "tokens_seen": 1684013056 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.4855785071849823, + "objective/train/docs_used": 952447, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7886948585510254, + "objective/train/original_loss": 2.7886953353881836, + "objective/train/theoretical_loss": 3.4807346542440833, + "objective/train/tokens_used": 1704735200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23818425834178925, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497686862945557, + "objective/train/weighted_lm_loss": 2.927842855453491, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9933276176452637, + "theoretical_loss": 3.4807346542440833, + "tokens_seen": 1684275200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020109723798713585, + "loss": 3.1108, + "theoretical_loss": 3.480594564788648, + "tokens_seen": 1685061632 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020090805902383654, + "loss": 3.1693, + "theoretical_loss": 3.4804079089744375, + "tokens_seen": 1686110208 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020071888006053726, + "loss": 3.2593, + "theoretical_loss": 3.4802214016828636, + "tokens_seen": 1687158784 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.4896550476551056, + "objective/train/docs_used": 954180, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.044297218322754, + "objective/train/original_loss": 3.044297218322754, + "objective/train/theoretical_loss": 3.4801514996965643, + "objective/train/tokens_used": 1708012000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24202604591846466, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501958131790161, + "objective/train/weighted_lm_loss": 3.196167230606079, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9804592728614807, + "theoretical_loss": 3.4801514996965643, + "tokens_seen": 1687552000 + }, + { + "epoch": 0.6, + "learning_rate": 0.000200529701097238, + "loss": 3.1462, + "theoretical_loss": 3.48003504270355, + "tokens_seen": 1688207360 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020034052213393873, + "loss": 3.1513, + "theoretical_loss": 3.4798488318265477, + "tokens_seen": 1689255936 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020015134317063942, + "loss": 3.1753, + "theoretical_loss": 3.479662768842334, + "tokens_seen": 1690304512 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.4866858124732971, + "objective/train/docs_used": 956253, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9044716358184814, + "objective/train/original_loss": 2.9044711589813232, + "objective/train/theoretical_loss": 3.4795697927446643, + "objective/train/tokens_used": 1711288800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2399260550737381, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049888253211975, + "objective/train/weighted_lm_loss": 3.0500967502593994, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9630663990974426, + "theoretical_loss": 3.4795697927446643, + "tokens_seen": 1690828800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019996216420734014, + "loss": 3.1323, + "theoretical_loss": 3.4794768535418146, + "tokens_seen": 1691353088 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019977298524404086, + "loss": 3.1002, + "theoretical_loss": 3.4792910857163193, + "tokens_seen": 1692401664 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019958380628074158, + "loss": 3.0834, + "theoretical_loss": 3.4791054651576006, + "tokens_seen": 1693450240 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.48621219396591187, + "objective/train/docs_used": 958497, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7586536407470703, + "objective/train/original_loss": 2.7586538791656494, + "objective/train/theoretical_loss": 3.4789895269999507, + "objective/train/tokens_used": 1714565600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24361838400363922, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498595237731934, + "objective/train/weighted_lm_loss": 2.896075963973999, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9519256353378296, + "theoretical_loss": 3.4789895269999507, + "tokens_seen": 1694105600 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001993946273174423, + "loss": 3.1498, + "theoretical_loss": 3.4789199916578353, + "tokens_seen": 1694498816 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019920544835414303, + "loss": 3.2399, + "theoretical_loss": 3.478734665009622, + "tokens_seen": 1695547392 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019901626939084375, + "loss": 3.2016, + "theoretical_loss": 3.4785494850059786, + "tokens_seen": 1696595968 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.4836694300174713, + "objective/train/docs_used": 960356, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7970266342163086, + "objective/train/original_loss": 2.7970266342163086, + "objective/train/theoretical_loss": 3.478410696114469, + "objective/train/tokens_used": 1717842400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2396220862865448, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495851039886475, + "objective/train/weighted_lm_loss": 2.934868097305298, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9527864456176758, + "theoretical_loss": 3.478410696114469, + "tokens_seen": 1697382400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019882709042754447, + "loss": 3.183, + "theoretical_loss": 3.478364451440343, + "tokens_seen": 1697644544 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001986379114642452, + "loss": 3.1736, + "theoretical_loss": 3.478179564106571, + "tokens_seen": 1698693120 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019844873250094588, + "loss": 3.1614, + "theoretical_loss": 3.4779948227989372, + "tokens_seen": 1699741696 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.4798791706562042, + "objective/train/docs_used": 961887, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2315726280212402, + "objective/train/original_loss": 3.231572151184082, + "objective/train/theoretical_loss": 3.477833293780412, + "objective/train/tokens_used": 1721119200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2355627715587616, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491852760314941, + "objective/train/weighted_lm_loss": 3.3893630504608154, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9673263430595398, + "theoretical_loss": 3.477833293780412, + "tokens_seen": 1700659200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019825955353764663, + "loss": 3.1208, + "theoretical_loss": 3.47781022731213, + "tokens_seen": 1700790272 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019807037457434735, + "loss": 3.1911, + "theoretical_loss": 3.4776257774412547, + "tokens_seen": 1701838848 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019788119561104807, + "loss": 3.1836, + "theoretical_loss": 3.4774414729818295, + "tokens_seen": 1702887424 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.4749166965484619, + "objective/train/docs_used": 963663, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2296407222747803, + "objective/train/original_loss": 3.2296409606933594, + "objective/train/theoretical_loss": 3.477257313729786, + "objective/train/tokens_used": 1724396000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23224613070487976, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048672080039978, + "objective/train/weighted_lm_loss": 3.385560989379883, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9597051739692688, + "theoretical_loss": 3.477257313729786, + "tokens_seen": 1703936000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019769201664774876, + "loss": 3.2243, + "theoretical_loss": 3.477257313729786, + "tokens_seen": 1703936000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019750283768444948, + "loss": 3.1611, + "theoretical_loss": 3.477073299481467, + "tokens_seen": 1704984576 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001973136587211502, + "loss": 3.178, + "theoretical_loss": 3.4768894300336264, + "tokens_seen": 1706033152 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019712447975785095, + "loss": 3.0676, + "theoretical_loss": 3.476705705183427, + "tokens_seen": 1707081728 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.4770734906196594, + "objective/train/docs_used": 965784, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9515833854675293, + "objective/train/original_loss": 2.951582908630371, + "objective/train/theoretical_loss": 3.4766827497340875, + "objective/train/tokens_used": 1727672800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23381619155406952, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488957166671753, + "objective/train/weighted_lm_loss": 3.0978763103485107, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9522743225097656, + "theoretical_loss": 3.4766827497340875, + "tokens_seen": 1707212800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019693530079455167, + "loss": 3.1771, + "theoretical_loss": 3.4765221247284415, + "tokens_seen": 1708130304 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019674612183125237, + "loss": 3.0724, + "theoretical_loss": 3.4763386884666483, + "tokens_seen": 1709178880 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019655694286795309, + "loss": 3.1534, + "theoretical_loss": 3.4761553961964338, + "tokens_seen": 1710227456 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.49260079860687256, + "objective/train/docs_used": 967686, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8040316104888916, + "objective/train/original_loss": 2.8040313720703125, + "objective/train/theoretical_loss": 3.476109595603976, + "objective/train/tokens_used": 1730949600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2447163164615631, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505040884017944, + "objective/train/weighted_lm_loss": 2.9455554485321045, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9516199231147766, + "theoretical_loss": 3.476109595603976, + "tokens_seen": 1710489600 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001963677639046538, + "loss": 3.1436, + "theoretical_loss": 3.475972247716588, + "tokens_seen": 1711276032 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019617858494135453, + "loss": 3.0968, + "theoretical_loss": 3.475789242826307, + "tokens_seen": 1712324608 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019598940597805525, + "loss": 3.1155, + "theoretical_loss": 3.4756063813251883, + "tokens_seen": 1713373184 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.48203980922698975, + "objective/train/docs_used": 969481, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.023247241973877, + "objective/train/original_loss": 3.023247241973877, + "objective/train/theoretical_loss": 3.475537845188954, + "objective/train/tokens_used": 1734226400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23657125234603882, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494064092636108, + "objective/train/weighted_lm_loss": 3.1723108291625977, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9527132511138916, + "theoretical_loss": 3.475537845188954, + "tokens_seen": 1713766400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019580022701475597, + "loss": 3.1438, + "theoretical_loss": 3.4754236630132325, + "tokens_seen": 1714421760 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001956110480514567, + "loss": 3.1437, + "theoretical_loss": 3.4752410876908413, + "tokens_seen": 1715470336 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001954218690881574, + "loss": 3.1662, + "theoretical_loss": 3.475058655158816, + "tokens_seen": 1716518912 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.46607887744903564, + "objective/train/docs_used": 970822, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1122360229492188, + "objective/train/original_loss": 3.1122357845306396, + "objective/train/theoretical_loss": 3.47496749237705, + "objective/train/tokens_used": 1737503200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2279558926820755, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0477663278579712, + "objective/train/weighted_lm_loss": 3.261258602142334, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9518850445747375, + "theoretical_loss": 3.47496749237705, + "tokens_seen": 1717043200 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001952326901248581, + "loss": 3.1927, + "theoretical_loss": 3.474876365218357, + "tokens_seen": 1717567488 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019504351116155882, + "loss": 3.1399, + "theoretical_loss": 3.4746942176710633, + "tokens_seen": 1718616064 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019485433219825957, + "loss": 3.1422, + "theoretical_loss": 3.4745122123189294, + "tokens_seen": 1719664640 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.4768146872520447, + "objective/train/docs_used": 972105, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9560952186584473, + "objective/train/original_loss": 2.9560952186584473, + "objective/train/theoretical_loss": 3.4743985310945047, + "objective/train/tokens_used": 1740780000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2370544970035553, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488862991333008, + "objective/train/weighted_lm_loss": 3.099886178970337, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9523165822029114, + "theoretical_loss": 3.4743985310945047, + "tokens_seen": 1720320000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001946651532349603, + "loss": 3.1597, + "theoretical_loss": 3.4743303489643473, + "tokens_seen": 1720713216 + }, + { + "epoch": 0.61, + "learning_rate": 0.000194475974271661, + "loss": 3.2761, + "theoretical_loss": 3.474148627410102, + "tokens_seen": 1721761792 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001942867953083617, + "loss": 3.1427, + "theoretical_loss": 3.4739670474593742, + "tokens_seen": 1722810368 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.49066078662872314, + "objective/train/docs_used": 974104, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2695088386535645, + "objective/train/original_loss": 3.2695088386535645, + "objective/train/theoretical_loss": 3.473830955305458, + "objective/train/tokens_used": 1744056800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24312639236450195, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503020286560059, + "objective/train/weighted_lm_loss": 3.4335639476776123, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9519912004470825, + "theoretical_loss": 3.473830955305458, + "tokens_seen": 1723596800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019409761634506243, + "loss": 3.1443, + "theoretical_loss": 3.4737856089157355, + "tokens_seen": 1723858944 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019390843738176315, + "loss": 3.1869, + "theoretical_loss": 3.4736043115831507, + "tokens_seen": 1724907520 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001937192584184639, + "loss": 3.1776, + "theoretical_loss": 3.4734231552659747, + "tokens_seen": 1725956096 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.4791083335876465, + "objective/train/docs_used": 976014, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.98252534866333, + "objective/train/original_loss": 2.98252534866333, + "objective/train/theoretical_loss": 3.4732647590116423, + "objective/train/tokens_used": 1747333600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23720155656337738, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491164922714233, + "objective/train/weighted_lm_loss": 3.1297645568847656, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9517265558242798, + "theoretical_loss": 3.4732647590116423, + "tokens_seen": 1726873600 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001935300794551646, + "loss": 3.1976, + "theoretical_loss": 3.473242139768953, + "tokens_seen": 1727004672 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001933409004918653, + "loss": 3.199, + "theoretical_loss": 3.4730612648972174, + "tokens_seen": 1728053248 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019315172152856603, + "loss": 3.2134, + "theoretical_loss": 3.4728805304562904, + "tokens_seen": 1729101824 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.48143187165260315, + "objective/train/docs_used": 978029, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.023312568664551, + "objective/train/original_loss": 3.02331280708313, + "objective/train/theoretical_loss": 3.472699936252079, + "objective/train/tokens_used": 1750610400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23814232647418976, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493534803390503, + "objective/train/weighted_lm_loss": 3.172309637069702, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9516406059265137, + "theoretical_loss": 3.472699936252079, + "tokens_seen": 1730150400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019296254256526675, + "loss": 3.1737, + "theoretical_loss": 3.472699936252079, + "tokens_seen": 1730150400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019277336360196744, + "loss": 3.1689, + "theoretical_loss": 3.4725194820908776, + "tokens_seen": 1731198976 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001925841846386682, + "loss": 3.1617, + "theoretical_loss": 3.4723391677793627, + "tokens_seen": 1732247552 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001923950056753689, + "loss": 3.2193, + "theoretical_loss": 3.472158993124598, + "tokens_seen": 1733296128 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.4837075471878052, + "objective/train/docs_used": 979550, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9834237098693848, + "objective/train/original_loss": 2.9834237098693848, + "objective/train/theoretical_loss": 3.4721364811027735, + "objective/train/tokens_used": 1753887200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23856787383556366, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495834350585938, + "objective/train/weighted_lm_loss": 3.1295249462127686, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.956782877445221, + "theoretical_loss": 3.4721364811027735, + "tokens_seen": 1733427200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019220582671206963, + "loss": 3.2026, + "theoretical_loss": 3.471978957934027, + "tokens_seen": 1734344704 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019201664774877035, + "loss": 3.223, + "theoretical_loss": 3.471799062015476, + "tokens_seen": 1735393280 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019182746878547105, + "loss": 3.2048, + "theoretical_loss": 3.4716193051771524, + "tokens_seen": 1736441856 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.4862057566642761, + "objective/train/docs_used": 981478, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.999570608139038, + "objective/train/original_loss": 2.999570608139038, + "objective/train/theoretical_loss": 3.4715743876764176, + "objective/train/tokens_used": 1757164000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23953025043010712, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498380661010742, + "objective/train/weighted_lm_loss": 3.149038076400757, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9523438811302185, + "theoretical_loss": 3.4715743876764176, + "tokens_seen": 1736704000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019163828982217177, + "loss": 3.2133, + "theoretical_loss": 3.4714396872276425, + "tokens_seen": 1737490432 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001914491108588725, + "loss": 3.1317, + "theoretical_loss": 3.471260207975912, + "tokens_seen": 1738539008 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019125993189557323, + "loss": 3.1341, + "theoretical_loss": 3.471080867231304, + "tokens_seen": 1739587584 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.47021281719207764, + "objective/train/docs_used": 983277, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.848379373550415, + "objective/train/original_loss": 2.848379135131836, + "objective/train/theoretical_loss": 3.471013650122095, + "objective/train/tokens_used": 1760440800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22874988615512848, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481839179992676, + "objective/train/weighted_lm_loss": 2.9869155883789062, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9516876339912415, + "theoretical_loss": 3.471013650122095, + "tokens_seen": 1739980800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019107075293227393, + "loss": 3.1987, + "theoretical_loss": 3.470901664803538, + "tokens_seen": 1740636160 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019088157396897465, + "loss": 3.1512, + "theoretical_loss": 3.470722600502711, + "tokens_seen": 1741684736 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019069239500567537, + "loss": 3.1581, + "theoretical_loss": 3.470543674139293, + "tokens_seen": 1742733312 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.4912092387676239, + "objective/train/docs_used": 985168, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.71474027633667, + "objective/train/original_loss": 2.714740753173828, + "objective/train/theoretical_loss": 3.470454262624987, + "objective/train/tokens_used": 1763717600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24285683035850525, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503555536270142, + "objective/train/weighted_lm_loss": 2.8520116806030273, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9635713696479797, + "theoretical_loss": 3.470454262624987, + "tokens_seen": 1743257600 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001905032160423761, + "loss": 3.2016, + "theoretical_loss": 3.4703648855241283, + "tokens_seen": 1743781888 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019031403707907684, + "loss": 3.1749, + "theoretical_loss": 3.470186234468435, + "tokens_seen": 1744830464 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019012485811577753, + "loss": 3.1757, + "theoretical_loss": 3.4700077207838023, + "tokens_seen": 1745879040 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.4769324064254761, + "objective/train/docs_used": 987031, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9645779132843018, + "objective/train/original_loss": 2.9645776748657227, + "objective/train/theoretical_loss": 3.469896219406081, + "objective/train/tokens_used": 1766994400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2403060346841812, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489145517349243, + "objective/train/weighted_lm_loss": 3.1101176738739014, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9535593390464783, + "theoretical_loss": 3.469896219406081, + "tokens_seen": 1746534400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018993567915247825, + "loss": 3.2297, + "theoretical_loss": 3.4698293442821915, + "tokens_seen": 1746927616 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018974650018917897, + "loss": 3.1923, + "theoretical_loss": 3.4696511047759317, + "tokens_seen": 1747976192 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001895573212258797, + "loss": 3.2484, + "theoretical_loss": 3.4694730020777245, + "tokens_seen": 1749024768 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.48558348417282104, + "objective/train/docs_used": 989075, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1397294998168945, + "objective/train/original_loss": 3.1397294998168945, + "objective/train/theoretical_loss": 3.4693395147218875, + "objective/train/tokens_used": 1770271200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24000008404254913, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497783422470093, + "objective/train/weighted_lm_loss": 3.2957050800323486, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9543235301971436, + "theoretical_loss": 3.4693395147218875, + "tokens_seen": 1749811200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018936814226258039, + "loss": 3.1974, + "theoretical_loss": 3.4692950360006365, + "tokens_seen": 1750073344 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018917896329928113, + "loss": 3.2439, + "theoretical_loss": 3.469117206358103, + "tokens_seen": 1751121920 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018898978433598185, + "loss": 3.2455, + "theoretical_loss": 3.4689395129639253, + "tokens_seen": 1752170496 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.4787355363368988, + "objective/train/docs_used": 991265, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.098680257797241, + "objective/train/original_loss": 3.098680019378662, + "objective/train/theoretical_loss": 3.4687841428641515, + "objective/train/tokens_used": 1773548000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24327175319194794, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491098165512085, + "objective/train/weighted_lm_loss": 3.2512500286102295, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.951521098613739, + "theoretical_loss": 3.4687841428641515, + "tokens_seen": 1753088000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018880060537268257, + "loss": 3.1711, + "theoretical_loss": 3.468761955632271, + "tokens_seen": 1753219072 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018861142640938327, + "loss": 3.2173, + "theoretical_loss": 3.4685845341776704, + "tokens_seen": 1754267648 + }, + { + "epoch": 0.63, + "learning_rate": 0.000188422247446084, + "loss": 3.1868, + "theoretical_loss": 3.468407248415019, + "tokens_seen": 1755316224 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.4866624176502228, + "objective/train/docs_used": 993214, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2612428665161133, + "objective/train/original_loss": 3.2612428665161133, + "objective/train/theoretical_loss": 3.468230098159573, + "objective/train/tokens_used": 1776824800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2405889928340912, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498892068862915, + "objective/train/weighted_lm_loss": 3.423818826675415, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9539464116096497, + "theoretical_loss": 3.468230098159573, + "tokens_seen": 1756364800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001882330684827847, + "loss": 3.1879, + "theoretical_loss": 3.468230098159573, + "tokens_seen": 1756364800 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018804388951948546, + "loss": 3.1916, + "theoretical_loss": 3.468053083226952, + "tokens_seen": 1757413376 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018785471055618618, + "loss": 3.169, + "theoretical_loss": 3.4678762034331347, + "tokens_seen": 1758461952 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018766553159288687, + "loss": 3.2222, + "theoretical_loss": 3.4676994585944616, + "tokens_seen": 1759510528 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.48814916610717773, + "objective/train/docs_used": 995165, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.884378433227539, + "objective/train/original_loss": 2.884378433227539, + "objective/train/theoretical_loss": 3.4676773749695275, + "objective/train/tokens_used": 1780101600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24147918820381165, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500423908233643, + "objective/train/weighted_lm_loss": 3.029207706451416, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9532400369644165, + "theoretical_loss": 3.4676773749695275, + "tokens_seen": 1759641600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001874763526295876, + "loss": 3.1404, + "theoretical_loss": 3.4675228485276297, + "tokens_seen": 1760559104 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001872871736662883, + "loss": 3.2087, + "theoretical_loss": 3.467346373049696, + "tokens_seen": 1761607680 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018709799470298903, + "loss": 3.1406, + "theoretical_loss": 3.467170031978074, + "tokens_seen": 1762656256 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.4851972758769989, + "objective/train/docs_used": 997036, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.950261116027832, + "objective/train/original_loss": 2.950261116027832, + "objective/train/theoretical_loss": 3.4671259676897908, + "objective/train/tokens_used": 1783378400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23831064999103546, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497310161590576, + "objective/train/weighted_lm_loss": 3.0977723598480225, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9909802675247192, + "theoretical_loss": 3.4671259676897908, + "tokens_seen": 1762918400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018690881573968975, + "loss": 3.1547, + "theoretical_loss": 3.4669938251305314, + "tokens_seen": 1763704832 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018671963677639047, + "loss": 3.1313, + "theoretical_loss": 3.4668177523251944, + "tokens_seen": 1764753408 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001865304578130912, + "loss": 3.1446, + "theoretical_loss": 3.466641813380541, + "tokens_seen": 1765801984 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.47915053367614746, + "objective/train/docs_used": 998803, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.053201198577881, + "objective/train/original_loss": 3.0532007217407227, + "objective/train/theoretical_loss": 3.4665758707502654, + "objective/train/tokens_used": 1786655200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23946774005889893, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491321086883545, + "objective/train/weighted_lm_loss": 3.2024171352386475, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9516938328742981, + "theoretical_loss": 3.4665758707502654, + "tokens_seen": 1766195200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018634127884979191, + "loss": 3.1561, + "theoretical_loss": 3.466466008115404, + "tokens_seen": 1766850560 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001861520998864926, + "loss": 3.1794, + "theoretical_loss": 3.4662903363489677, + "tokens_seen": 1767899136 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018596292092319333, + "loss": 3.1129, + "theoretical_loss": 3.4661147979007687, + "tokens_seen": 1768947712 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.4781850278377533, + "objective/train/docs_used": 999986, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.99733304977417, + "objective/train/original_loss": 2.99733304977417, + "objective/train/theoretical_loss": 3.466027078614709, + "objective/train/tokens_used": 1789932000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2370835840702057, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490233898162842, + "objective/train/weighted_lm_loss": 3.1454968452453613, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9598371982574463, + "theoretical_loss": 3.466027078614709, + "tokens_seen": 1769472000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018577374195989408, + "loss": 3.1839, + "theoretical_loss": 3.4659393925906943, + "tokens_seen": 1769996288 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001855845629965948, + "loss": 3.1999, + "theoretical_loss": 3.4657641202389815, + "tokens_seen": 1771044864 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018539538403329552, + "loss": 3.177, + "theoretical_loss": 3.465588980666216, + "tokens_seen": 1772093440 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.4717888832092285, + "objective/train/docs_used": 1001522, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.009805917739868, + "objective/train/original_loss": 3.0098061561584473, + "objective/train/theoretical_loss": 3.465479585780467, + "objective/train/tokens_used": 1793208800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2324257791042328, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0483598709106445, + "objective/train/weighted_lm_loss": 3.155412435531616, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9513979554176331, + "theoretical_loss": 3.465479585780467, + "tokens_seen": 1772748800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001852062050699962, + "loss": 3.1154, + "theoretical_loss": 3.4654139736933325, + "tokens_seen": 1773142016 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018501702610669693, + "loss": 3.1981, + "theoretical_loss": 3.4652390991416118, + "tokens_seen": 1774190592 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018482784714339765, + "loss": 3.1889, + "theoretical_loss": 3.4650643568326815, + "tokens_seen": 1775239168 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.4764856994152069, + "objective/train/docs_used": 1003498, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7835757732391357, + "objective/train/original_loss": 2.7835755348205566, + "objective/train/theoretical_loss": 3.4649333867782075, + "objective/train/tokens_used": 1796485600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2330160140991211, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048832893371582, + "objective/train/weighted_lm_loss": 2.920579671859741, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9544862508773804, + "theoretical_loss": 3.4649333867782075, + "tokens_seen": 1776025600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001846386681800984, + "loss": 3.1457, + "theoretical_loss": 3.464889746588515, + "tokens_seen": 1776287744 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001844494892167991, + "loss": 3.1293, + "theoretical_loss": 3.464715268231429, + "tokens_seen": 1777336320 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001842603102534998, + "loss": 3.1338, + "theoretical_loss": 3.464540921584086, + "tokens_seen": 1778384896 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.4912343919277191, + "objective/train/docs_used": 1005457, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6683099269866943, + "objective/train/original_loss": 2.6683099269866943, + "objective/train/theoretical_loss": 3.464388476171658, + "objective/train/tokens_used": 1799762400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24345652759075165, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503610372543335, + "objective/train/weighted_lm_loss": 2.8025920391082764, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9594215154647827, + "theoretical_loss": 3.464388476171658, + "tokens_seen": 1779302400 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018407113129020053, + "loss": 3.1607, + "theoretical_loss": 3.464366706469489, + "tokens_seen": 1779433472 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018388195232690125, + "loss": 3.0636, + "theoretical_loss": 3.4641926227109856, + "tokens_seen": 1780482048 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018369277336360197, + "loss": 3.0864, + "theoretical_loss": 3.464018670132262, + "tokens_seen": 1781530624 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.47258418798446655, + "objective/train/docs_used": 1007648, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7945456504821777, + "objective/train/original_loss": 2.7945456504821777, + "objective/train/theoretical_loss": 3.463844848557345, + "objective/train/tokens_used": 1803039200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23542162775993347, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484546422958374, + "objective/train/weighted_lm_loss": 2.9284746646881104, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9513460397720337, + "theoretical_loss": 3.463844848557345, + "tokens_seen": 1782579200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001835035944003027, + "loss": 3.1216, + "theoretical_loss": 3.463844848557345, + "tokens_seen": 1782579200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018331441543700342, + "loss": 3.1505, + "theoretical_loss": 3.4636711578106034, + "tokens_seen": 1783627776 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018312523647370414, + "loss": 3.0778, + "theoretical_loss": 3.4634975977167413, + "tokens_seen": 1784676352 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018293605751040486, + "loss": 3.1852, + "theoretical_loss": 3.463324168100802, + "tokens_seen": 1785724928 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.4785902500152588, + "objective/train/docs_used": 1009512, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8947219848632812, + "objective/train/original_loss": 2.8947219848632812, + "objective/train/theoretical_loss": 3.463302498564338, + "objective/train/tokens_used": 1806316000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23424488306045532, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490496158599854, + "objective/train/weighted_lm_loss": 3.0360755920410156, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9536494016647339, + "theoretical_loss": 3.463302498564338, + "tokens_seen": 1785856000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018274687854710555, + "loss": 3.1466, + "theoretical_loss": 3.463150868788165, + "tokens_seen": 1786773504 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018255769958380627, + "loss": 3.1598, + "theoretical_loss": 3.4629776996045476, + "tokens_seen": 1787822080 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018236852062050702, + "loss": 3.1194, + "theoretical_loss": 3.462804660376, + "tokens_seen": 1788870656 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.48804527521133423, + "objective/train/docs_used": 1011320, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.961782217025757, + "objective/train/original_loss": 2.961782932281494, + "objective/train/theoretical_loss": 3.46276142085399, + "objective/train/tokens_used": 1809592800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2429705113172531, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050039529800415, + "objective/train/weighted_lm_loss": 3.109548807144165, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9592433571815491, + "theoretical_loss": 3.46276142085399, + "tokens_seen": 1789132800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018217934165720774, + "loss": 3.1391, + "theoretical_loss": 3.4626317509289075, + "tokens_seen": 1789919232 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018199016269390843, + "loss": 3.1193, + "theoretical_loss": 3.462458971089989, + "tokens_seen": 1790967808 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018180098373060915, + "loss": 3.1471, + "theoretical_loss": 3.462286320686297, + "tokens_seen": 1792016384 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.4847014844417572, + "objective/train/docs_used": 1013493, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9981818199157715, + "objective/train/original_loss": 2.9981818199157715, + "objective/train/theoretical_loss": 3.4622216101196894, + "objective/train/tokens_used": 1812869600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2426302582025528, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497032403945923, + "objective/train/weighted_lm_loss": 3.147491455078125, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.951403021812439, + "theoretical_loss": 3.4622216101196894, + "tokens_seen": 1792409600 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018161180476730987, + "loss": 3.1424, + "theoretical_loss": 3.4621137995452136, + "tokens_seen": 1793064960 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001814226258040106, + "loss": 3.0922, + "theoretical_loss": 3.4619414074944537, + "tokens_seen": 1794113536 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018123344684071134, + "loss": 3.1283, + "theoretical_loss": 3.4617691443620617, + "tokens_seen": 1795162112 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.4758584201335907, + "objective/train/docs_used": 1015450, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7228169441223145, + "objective/train/original_loss": 2.7228169441223145, + "objective/train/theoretical_loss": 3.4616830610866076, + "objective/train/tokens_used": 1816146400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23320035636425018, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487710237503052, + "objective/train/weighted_lm_loss": 2.856950283050537, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9522925615310669, + "theoretical_loss": 3.4616830610866076, + "tokens_seen": 1795686400 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018104426787741203, + "loss": 3.1451, + "theoretical_loss": 3.4615970099764115, + "tokens_seen": 1796210688 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018085508891411276, + "loss": 3.1009, + "theoretical_loss": 3.4614250041662054, + "tokens_seen": 1797259264 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018066590995081348, + "loss": 3.0992, + "theoretical_loss": 3.4612531267604734, + "tokens_seen": 1798307840 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.480893075466156, + "objective/train/docs_used": 1017085, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.92893385887146, + "objective/train/original_loss": 2.928933620452881, + "objective/train/theoretical_loss": 3.461145768511451, + "objective/train/tokens_used": 1819423200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2371511161327362, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492945909500122, + "objective/train/weighted_lm_loss": 3.072615623474121, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9612343907356262, + "theoretical_loss": 3.461145768511451, + "tokens_seen": 1798963200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001804767309875142, + "loss": 3.131, + "theoretical_loss": 3.461081377588572, + "tokens_seen": 1799356416 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001802875520242149, + "loss": 3.0904, + "theoretical_loss": 3.4609097564801843, + "tokens_seen": 1800404992 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018009837306091564, + "loss": 3.0347, + "theoretical_loss": 3.460738263265319, + "tokens_seen": 1801453568 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.4862983524799347, + "objective/train/docs_used": 1018281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.062450885772705, + "objective/train/original_loss": 3.062450647354126, + "objective/train/theoretical_loss": 3.4606097271822156, + "objective/train/tokens_used": 1822700000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24067695438861847, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498532056808472, + "objective/train/weighted_lm_loss": 3.2150371074676514, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9518213272094727, + "theoretical_loss": 3.4606097271822156, + "tokens_seen": 1802240000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017990919409761636, + "loss": 3.1488, + "theoretical_loss": 3.4605668977743074, + "tokens_seen": 1802502144 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017972001513431708, + "loss": 3.0774, + "theoretical_loss": 3.4603956598378067, + "tokens_seen": 1803550720 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001795308361710178, + "loss": 3.0499, + "theoretical_loss": 3.4602245492867962, + "tokens_seen": 1804599296 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.49369117617607117, + "objective/train/docs_used": 1020317, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0229692459106445, + "objective/train/original_loss": 3.0229697227478027, + "objective/train/theoretical_loss": 3.4600749319179434, + "objective/train/tokens_used": 1825976800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24504995346069336, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506149530410767, + "objective/train/weighted_lm_loss": 3.1760122776031494, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9538986086845398, + "theoretical_loss": 3.4600749319179434, + "tokens_seen": 1805516800 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001793416572077185, + "loss": 3.1218, + "theoretical_loss": 3.4600535659525757, + "tokens_seen": 1805647872 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001791524782444192, + "loss": 3.1079, + "theoretical_loss": 3.4598827096667684, + "tokens_seen": 1806696448 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017896329928111996, + "loss": 3.0629, + "theoretical_loss": 3.459711980261316, + "tokens_seen": 1807745024 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.47898003458976746, + "objective/train/docs_used": 1022235, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0741958618164062, + "objective/train/original_loss": 3.074195384979248, + "objective/train/theoretical_loss": 3.459541377568482, + "objective/train/tokens_used": 1829253600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23444890975952148, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490895509719849, + "objective/train/weighted_lm_loss": 3.2251052856445312, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9592746496200562, + "theoretical_loss": 3.459541377568482, + "tokens_seen": 1808793600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017877412031782068, + "loss": 3.043, + "theoretical_loss": 3.459541377568482, + "tokens_seen": 1808793600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017858494135452138, + "loss": 3.0361, + "theoretical_loss": 3.4593709014208462, + "tokens_seen": 1809842176 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001783957623912221, + "loss": 3.0718, + "theoretical_loss": 3.4592005516513087, + "tokens_seen": 1810890752 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017820658342792282, + "loss": 3.0793, + "theoretical_loss": 3.4590303280930854, + "tokens_seen": 1811939328 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.47567683458328247, + "objective/train/docs_used": 1024230, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0362322330474854, + "objective/train/original_loss": 3.0362324714660645, + "objective/train/theoretical_loss": 3.4590090590142464, + "objective/train/tokens_used": 1832530400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23685020208358765, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048771619796753, + "objective/train/weighted_lm_loss": 3.1860275268554688, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9578182697296143, + "theoretical_loss": 3.4590090590142464, + "tokens_seen": 1812070400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017801740446462354, + "loss": 3.0886, + "theoretical_loss": 3.4588602305797096, + "tokens_seen": 1812987904 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017782822550132426, + "loss": 3.0809, + "theoretical_loss": 3.458690258945029, + "tokens_seen": 1814036480 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017763904653802498, + "loss": 3.1236, + "theoretical_loss": 3.458520413023207, + "tokens_seen": 1815085056 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.4894339442253113, + "objective/train/docs_used": 1026340, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.314352512359619, + "objective/train/original_loss": 3.31435227394104, + "objective/train/theoretical_loss": 3.4584779711659817, + "objective/train/tokens_used": 1835807200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24342581629753113, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501807928085327, + "objective/train/weighted_lm_loss": 3.4804110527038574, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9533761739730835, + "theoretical_loss": 3.4584779711659817, + "tokens_seen": 1815347200 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001774498675747257, + "loss": 3.1066, + "theoretical_loss": 3.458350692648722, + "tokens_seen": 1816133632 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017726068861142642, + "loss": 3.0877, + "theoretical_loss": 3.4581810976563645, + "tokens_seen": 1817182208 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017707150964812714, + "loss": 3.1435, + "theoretical_loss": 3.4580116278812376, + "tokens_seen": 1818230784 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.49165356159210205, + "objective/train/docs_used": 1027487, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.3668980598449707, + "objective/train/original_loss": 2.3668980598449707, + "objective/train/theoretical_loss": 3.4579481089645308, + "objective/train/tokens_used": 1839084000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24341939389705658, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504027605056763, + "objective/train/weighted_lm_loss": 2.486278533935547, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9554612040519714, + "theoretical_loss": 3.4579481089645308, + "tokens_seen": 1818624000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017688233068482783, + "loss": 3.155, + "theoretical_loss": 3.457842283158757, + "tokens_seen": 1819279360 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017669315172152858, + "loss": 3.179, + "theoretical_loss": 3.457673063324649, + "tokens_seen": 1820327936 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001765039727582293, + "loss": 3.1094, + "theoretical_loss": 3.4575039682149495, + "tokens_seen": 1821376512 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.4697323441505432, + "objective/train/docs_used": 1029511, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7002482414245605, + "objective/train/original_loss": 2.7002482414245605, + "objective/train/theoretical_loss": 3.457419467380599, + "objective/train/tokens_used": 1842360800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23787598311901093, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481818914413452, + "objective/train/weighted_lm_loss": 2.829710006713867, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9514086842536926, + "theoretical_loss": 3.457419467380599, + "tokens_seen": 1821900800 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017631479379493002, + "loss": 3.2005, + "theoretical_loss": 3.4573349976660053, + "tokens_seen": 1822425088 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017612561483163072, + "loss": 3.0605, + "theoretical_loss": 3.4571661515144703, + "tokens_seen": 1823473664 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017593643586833144, + "loss": 3.1137, + "theoretical_loss": 3.4569974295973083, + "tokens_seen": 1824522240 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.48513007164001465, + "objective/train/docs_used": 1031319, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.138819456100464, + "objective/train/original_loss": 3.1388192176818848, + "objective/train/theoretical_loss": 3.4568920414145294, + "objective/train/tokens_used": 1845637600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2404634803533554, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049735188484192, + "objective/train/weighted_lm_loss": 3.294522285461426, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9532184600830078, + "theoretical_loss": 3.4568920414145294, + "tokens_seen": 1825177600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017574725690503216, + "loss": 3.1527, + "theoretical_loss": 3.456828831751788, + "tokens_seen": 1825570816 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001755580779417329, + "loss": 3.0631, + "theoretical_loss": 3.4566603578154877, + "tokens_seen": 1826619392 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017536889897843362, + "loss": 3.1108, + "theoretical_loss": 3.456492007626288, + "tokens_seen": 1827667968 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.48329445719718933, + "objective/train/docs_used": 1033212, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.762449026107788, + "objective/train/original_loss": 2.762449264526367, + "objective/train/theoretical_loss": 3.4563658260960706, + "objective/train/tokens_used": 1848914400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23659563064575195, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495319366455078, + "objective/train/weighted_lm_loss": 2.900442123413086, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9571607708930969, + "theoretical_loss": 3.4563658260960706, + "tokens_seen": 1828454400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017517972001513432, + "loss": 3.174, + "theoretical_loss": 3.456323781022376, + "tokens_seen": 1828716544 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017499054105183504, + "loss": 3.0913, + "theoretical_loss": 3.456155677842244, + "tokens_seen": 1829765120 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017480136208853576, + "loss": 3.086, + "theoretical_loss": 3.455987697924686, + "tokens_seen": 1830813696 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.49024006724357605, + "objective/train/docs_used": 1034888, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1704938411712646, + "objective/train/original_loss": 3.1704936027526855, + "objective/train/theoretical_loss": 3.455840816484155, + "objective/train/tokens_used": 1852191200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24271473288536072, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050257921218872, + "objective/train/weighted_lm_loss": 3.3301610946655273, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9584827423095703, + "theoretical_loss": 3.455840816484155, + "tokens_seen": 1831731200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017461218312523648, + "loss": 3.114, + "theoretical_loss": 3.4558198411088004, + "tokens_seen": 1831862272 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001744230041619372, + "loss": 3.151, + "theoretical_loss": 3.4556521072339854, + "tokens_seen": 1832910848 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017423382519863792, + "loss": 3.1021, + "theoretical_loss": 3.455484496139943, + "tokens_seen": 1833959424 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.48320844769477844, + "objective/train/docs_used": 1036847, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1283445358276367, + "objective/train/original_loss": 3.1283445358276367, + "objective/train/theoretical_loss": 3.4553170076666744, + "objective/train/tokens_used": 1855468000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24146130681037903, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495481491088867, + "objective/train/weighted_lm_loss": 3.282777786254883, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9517518877983093, + "theoretical_loss": 3.4553170076666744, + "tokens_seen": 1835008000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017404464623533864, + "loss": 3.0562, + "theoretical_loss": 3.4553170076666744, + "tokens_seen": 1835008000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017385546727203936, + "loss": 3.0563, + "theoretical_loss": 3.4551496416544794, + "tokens_seen": 1836056576 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017366628830874006, + "loss": 3.1021, + "theoretical_loss": 3.4549823979439593, + "tokens_seen": 1837105152 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017347710934544078, + "loss": 3.1223, + "theoretical_loss": 3.454815276376012, + "tokens_seen": 1838153728 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.4860526919364929, + "objective/train/docs_used": 1038918, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.040283679962158, + "objective/train/original_loss": 3.040283679962158, + "objective/train/theoretical_loss": 3.454794394760256, + "objective/train/tokens_used": 1858744800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2394542098045349, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049822449684143, + "objective/train/weighted_lm_loss": 3.1913719177246094, + "objective/train/weights_max": 1.0512157678604126, + "objective/train/weights_min": 0.9641718864440918, + "theoretical_loss": 3.454794394760256, + "tokens_seen": 1838284800 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017328793038214152, + "loss": 3.1416, + "theoretical_loss": 3.4546482767918336, + "tokens_seen": 1839202304 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017309875141884224, + "loss": 3.1489, + "theoretical_loss": 3.4544813990329173, + "tokens_seen": 1840250880 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017290957245554296, + "loss": 3.0767, + "theoretical_loss": 3.4543146429410516, + "tokens_seen": 1841299456 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.4855334758758545, + "objective/train/docs_used": 1041004, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6250319480895996, + "objective/train/original_loss": 2.6250319480895996, + "objective/train/theoretical_loss": 3.4542729729100494, + "objective/train/tokens_used": 1862021600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2389741688966751, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497679710388184, + "objective/train/weighted_lm_loss": 2.7558908462524414, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9570268988609314, + "theoretical_loss": 3.4542729729100494, + "tokens_seen": 1841561600 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017272039349224366, + "loss": 3.0565, + "theoretical_loss": 3.4541480083583203, + "tokens_seen": 1842348032 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017253121452894438, + "loss": 3.1441, + "theoretical_loss": 3.453981495127104, + "tokens_seen": 1843396608 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001723420355656451, + "loss": 3.1785, + "theoretical_loss": 3.453815103090075, + "tokens_seen": 1844445184 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.4911976158618927, + "objective/train/docs_used": 1042471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1483094692230225, + "objective/train/original_loss": 3.1483097076416016, + "objective/train/theoretical_loss": 3.4537527372895047, + "objective/train/tokens_used": 1865298400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24456915259361267, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503631830215454, + "objective/train/weighted_lm_loss": 3.3062546253204346, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9537386298179626, + "theoretical_loss": 3.4537527372895047, + "tokens_seen": 1844838400 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017215285660234585, + "loss": 3.1755, + "theoretical_loss": 3.453648832090199, + "tokens_seen": 1845493760 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017196367763904654, + "loss": 3.1402, + "theoretical_loss": 3.453482681970735, + "tokens_seen": 1846542336 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017177449867574726, + "loss": 3.1996, + "theoretical_loss": 3.453316652575235, + "tokens_seen": 1847590912 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.4777521789073944, + "objective/train/docs_used": 1043966, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8465492725372314, + "objective/train/original_loss": 2.8465495109558105, + "objective/train/theoretical_loss": 3.45323368310016, + "objective/train/tokens_used": 1868575200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2377437949180603, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489834547042847, + "objective/train/weighted_lm_loss": 2.9855000972747803, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.95146644115448, + "theoretical_loss": 3.45323368310016, + "tokens_seen": 1848115200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017158531971244798, + "loss": 3.1897, + "theoretical_loss": 3.453150743747539, + "tokens_seen": 1848639488 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001713961407491487, + "loss": 3.1487, + "theoretical_loss": 3.4529849553317806, + "tokens_seen": 1849688064 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001712069617858494, + "loss": 3.1324, + "theoretical_loss": 3.4528192871723813, + "tokens_seen": 1850736640 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.49022161960601807, + "objective/train/docs_used": 1045702, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6801793575286865, + "objective/train/original_loss": 2.6801795959472656, + "objective/train/theoretical_loss": 3.452715805571427, + "objective/train/tokens_used": 1871852000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24245530366897583, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502547025680542, + "objective/train/weighted_lm_loss": 2.814542293548584, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9725965261459351, + "theoretical_loss": 3.452715805571427, + "tokens_seen": 1851392000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017101778282255014, + "loss": 3.1696, + "theoretical_loss": 3.4526537391140524, + "tokens_seen": 1851785216 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017082860385925086, + "loss": 3.226, + "theoretical_loss": 3.452488311001792, + "tokens_seen": 1852833792 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017063942489595158, + "loss": 3.1755, + "theoretical_loss": 3.452323002680888, + "tokens_seen": 1853882368 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.4886515140533447, + "objective/train/docs_used": 1047742, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.119795083999634, + "objective/train/original_loss": 3.119795322418213, + "objective/train/theoretical_loss": 3.4521990999603807, + "objective/train/tokens_used": 1875128800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24155665934085846, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500929355621338, + "objective/train/weighted_lm_loss": 3.276261806488037, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9518893957138062, + "theoretical_loss": 3.4521990999603807, + "tokens_seen": 1854668800 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001704502459326523, + "loss": 3.1549, + "theoretical_loss": 3.452157813996915, + "tokens_seen": 1854930944 + }, + { + "epoch": 0.66, + "learning_rate": 0.000170261066969353, + "loss": 3.1942, + "theoretical_loss": 3.4519927447957315, + "tokens_seen": 1855979520 + }, + { + "epoch": 0.66, + "learning_rate": 0.00017007188800605372, + "loss": 3.2162, + "theoretical_loss": 3.4518277949234837, + "tokens_seen": 1857028096 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.4771397113800049, + "objective/train/docs_used": 1049474, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.327524185180664, + "objective/train/original_loss": 3.327524185180664, + "objective/train/theoretical_loss": 3.451683561551551, + "objective/train/tokens_used": 1878405600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2344101220369339, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048905372619629, + "objective/train/weighted_lm_loss": 3.490372896194458, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9525474309921265, + "theoretical_loss": 3.451683561551551, + "tokens_seen": 1857945600 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016988270904275447, + "loss": 3.1831, + "theoretical_loss": 3.451662964226602, + "tokens_seen": 1858076672 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001696935300794552, + "loss": 3.249, + "theoretical_loss": 3.4514982525518008, + "tokens_seen": 1859125248 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016950435111615588, + "loss": 3.2283, + "theoretical_loss": 3.451333659746079, + "tokens_seen": 1860173824 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.48431384563446045, + "objective/train/docs_used": 1051410, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.301570177078247, + "objective/train/original_loss": 3.301570177078247, + "objective/train/theoretical_loss": 3.451169185656717, + "objective/train/tokens_used": 1881682400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23943281173706055, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049648404121399, + "objective/train/weighted_lm_loss": 3.4656476974487305, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9544273614883423, + "theoretical_loss": 3.451169185656717, + "tokens_seen": 1861222400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001693151721528566, + "loss": 3.2445, + "theoretical_loss": 3.451169185656717, + "tokens_seen": 1861222400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016912599318955732, + "loss": 3.0725, + "theoretical_loss": 3.451004830131277, + "tokens_seen": 1862270976 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016893681422625804, + "loss": 3.2268, + "theoretical_loss": 3.450840593017604, + "tokens_seen": 1863319552 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001687476352629588, + "loss": 3.2105, + "theoretical_loss": 3.4506764741638234, + "tokens_seen": 1864368128 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.49058830738067627, + "objective/train/docs_used": 1053575, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6096420288085938, + "objective/train/original_loss": 2.6096420288085938, + "objective/train/theoretical_loss": 3.450655967614696, + "objective/train/tokens_used": 1884959200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24194616079330444, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502886772155762, + "objective/train/weighted_lm_loss": 2.7413790225982666, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9557876586914062, + "theoretical_loss": 3.450655967614696, + "tokens_seen": 1864499200 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016855845629965948, + "loss": 3.1053, + "theoretical_loss": 3.45051247341834, + "tokens_seen": 1865416704 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001683692773363602, + "loss": 3.1242, + "theoretical_loss": 3.4503485906298383, + "tokens_seen": 1866465280 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016818009837306092, + "loss": 3.1477, + "theoretical_loss": 3.450184825647282, + "tokens_seen": 1867513856 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.4847777783870697, + "objective/train/docs_used": 1055519, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1753129959106445, + "objective/train/original_loss": 3.1753129959106445, + "objective/train/theoretical_loss": 3.4501439027911487, + "objective/train/tokens_used": 1888236000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24292968213558197, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497125387191772, + "objective/train/weighted_lm_loss": 3.332350730895996, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9515226483345032, + "theoretical_loss": 3.4501439027911487, + "tokens_seen": 1867776000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016799091940976164, + "loss": 3.1434, + "theoretical_loss": 3.450021178319912, + "tokens_seen": 1868562432 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016780174044646234, + "loss": 3.1115, + "theoretical_loss": 3.4498576484972476, + "tokens_seen": 1869611008 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016761256148316309, + "loss": 3.0889, + "theoretical_loss": 3.4496942360290843, + "tokens_seen": 1870659584 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.4783223867416382, + "objective/train/docs_used": 1057572, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.028226375579834, + "objective/train/original_loss": 3.028226137161255, + "objective/train/theoretical_loss": 3.4496329865783713, + "objective/train/tokens_used": 1891512800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23674528300762177, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490355491638184, + "objective/train/weighted_lm_loss": 3.176835060119629, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.951676070690155, + "theoretical_loss": 3.4496329865783713, + "tokens_seen": 1871052800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001674233825198638, + "loss": 3.1617, + "theoretical_loss": 3.4495309407654937, + "tokens_seen": 1871708160 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016723420355656453, + "loss": 3.1119, + "theoretical_loss": 3.449367762556822, + "tokens_seen": 1872756736 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016704502459326522, + "loss": 3.0936, + "theoretical_loss": 3.449204701253692, + "tokens_seen": 1873805312 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.4878344237804413, + "objective/train/docs_used": 1059653, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6503984928131104, + "objective/train/original_loss": 2.6503987312316895, + "objective/train/theoretical_loss": 3.449123214395099, + "objective/train/tokens_used": 1894789600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24199466407299042, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500136613845825, + "objective/train/weighted_lm_loss": 2.782848358154297, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9590559005737305, + "theoretical_loss": 3.449123214395099, + "tokens_seen": 1874329600 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016685584562996594, + "loss": 3.1029, + "theoretical_loss": 3.4490417567069986, + "tokens_seen": 1874853888 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016666666666666666, + "loss": 3.096, + "theoretical_loss": 3.4488789287679116, + "tokens_seen": 1875902464 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016647748770336738, + "loss": 3.0593, + "theoretical_loss": 3.4487162172878723, + "tokens_seen": 1876951040 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.48306140303611755, + "objective/train/docs_used": 1061454, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.986534833908081, + "objective/train/original_loss": 2.986534595489502, + "objective/train/theoretical_loss": 3.4486145816863085, + "objective/train/tokens_used": 1898066400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2385236769914627, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495185852050781, + "objective/train/weighted_lm_loss": 3.1347832679748535, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9519363045692444, + "theoretical_loss": 3.4486145816863085, + "tokens_seen": 1877606400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016628830874006813, + "loss": 3.0675, + "theoretical_loss": 3.4485536221185957, + "tokens_seen": 1877999616 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016609912977676882, + "loss": 3.0906, + "theoretical_loss": 3.448391143112067, + "tokens_seen": 1879048192 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016590995081346954, + "loss": 3.1067, + "theoretical_loss": 3.4482287801205422, + "tokens_seen": 1880096768 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.48760080337524414, + "objective/train/docs_used": 1063292, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.3369407653808594, + "objective/train/original_loss": 2.3369412422180176, + "objective/train/theoretical_loss": 3.448107083923021, + "objective/train/tokens_used": 1901343200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23860259354114532, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499727725982666, + "objective/train/weighted_lm_loss": 2.45414400100708, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9833489656448364, + "theoretical_loss": 3.448107083923021, + "tokens_seen": 1880883200 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016572077185017026, + "loss": 3.0392, + "theoretical_loss": 3.4480665329965485, + "tokens_seen": 1881145344 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016553159288687098, + "loss": 3.0593, + "theoretical_loss": 3.447904401592882, + "tokens_seen": 1882193920 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016534241392357168, + "loss": 3.0361, + "theoretical_loss": 3.4477423857626066, + "tokens_seen": 1883242496 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.48698893189430237, + "objective/train/docs_used": 1065366, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.880833148956299, + "objective/train/original_loss": 2.880833148956299, + "objective/train/theoretical_loss": 3.447600716602108, + "objective/train/tokens_used": 1904620000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23971621692180634, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499173402786255, + "objective/train/weighted_lm_loss": 3.0245883464813232, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9548348784446716, + "theoretical_loss": 3.447600716602108, + "tokens_seen": 1884160000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016515323496027243, + "loss": 3.1034, + "theoretical_loss": 3.4475804853590573, + "tokens_seen": 1884291072 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016496405599697315, + "loss": 3.019, + "theoretical_loss": 3.447418700235833, + "tokens_seen": 1885339648 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016477487703367387, + "loss": 3.0309, + "theoretical_loss": 3.4472570302468037, + "tokens_seen": 1886388224 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.4793676733970642, + "objective/train/docs_used": 1066806, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5835084915161133, + "objective/train/original_loss": 2.5835084915161133, + "objective/train/theoretical_loss": 3.447095475246102, + "objective/train/tokens_used": 1907896800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23561342060565948, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049134373664856, + "objective/train/weighted_lm_loss": 2.7097527980804443, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.96308833360672, + "theoretical_loss": 3.447095475246102, + "tokens_seen": 1887436800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016458569807037456, + "loss": 3.0728, + "theoretical_loss": 3.447095475246102, + "tokens_seen": 1887436800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00016439651910707528, + "loss": 3.0546, + "theoretical_loss": 3.446934035088128, + "tokens_seen": 1888485376 + }, + { + "epoch": 0.67, + "learning_rate": 0.000164207340143776, + "loss": 3.0043, + "theoretical_loss": 3.446772709627547, + "tokens_seen": 1889533952 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016401816118047675, + "loss": 3.0649, + "theoretical_loss": 3.4466114987192884, + "tokens_seen": 1890582528 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.49177709221839905, + "objective/train/docs_used": 1068619, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9133520126342773, + "objective/train/original_loss": 2.9133520126342773, + "objective/train/theoretical_loss": 3.446591355403001, + "objective/train/tokens_used": 1911173600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2440955489873886, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050418496131897, + "objective/train/weighted_lm_loss": 3.0605461597442627, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9515418410301208, + "theoretical_loss": 3.446591355403001, + "tokens_seen": 1890713600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016382898221717747, + "loss": 3.0891, + "theoretical_loss": 3.446450402218545, + "tokens_seen": 1891631104 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016363980325387816, + "loss": 3.0613, + "theoretical_loss": 3.446289419980774, + "tokens_seen": 1892679680 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016345062429057888, + "loss": 3.1123, + "theoretical_loss": 3.446128551861694, + "tokens_seen": 1893728256 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.4786206781864166, + "objective/train/docs_used": 1071183, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7194037437438965, + "objective/train/original_loss": 2.7194037437438965, + "objective/train/theoretical_loss": 3.446088352646086, + "objective/train/tokens_used": 1914450400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23550763726234436, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490589141845703, + "objective/train/weighted_lm_loss": 2.853081464767456, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9536006450653076, + "theoretical_loss": 3.446088352646086, + "tokens_seen": 1893990400 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001632614453272796, + "loss": 3.0588, + "theoretical_loss": 3.4459677977172847, + "tokens_seen": 1894776832 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016307226636398032, + "loss": 3.0971, + "theoretical_loss": 3.4458071574037907, + "tokens_seen": 1895825408 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016288308740068104, + "loss": 3.1363, + "theoretical_loss": 3.4456466307777127, + "tokens_seen": 1896873984 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.4824937582015991, + "objective/train/docs_used": 1073215, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.948737621307373, + "objective/train/original_loss": 2.948737621307373, + "objective/train/theoretical_loss": 3.4455864625737282, + "objective/train/tokens_used": 1917727200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2392544150352478, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049465537071228, + "objective/train/weighted_lm_loss": 3.0943331718444824, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9522041082382202, + "theoretical_loss": 3.4455864625737282, + "tokens_seen": 1897267200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016269390843738177, + "loss": 3.1575, + "theoretical_loss": 3.4454862176958154, + "tokens_seen": 1897922560 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016250472947408249, + "loss": 3.07, + "theoretical_loss": 3.4453259180151203, + "tokens_seen": 1898971136 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001623155505107832, + "loss": 3.0711, + "theoretical_loss": 3.4451657315929096, + "tokens_seen": 1900019712 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.4815155863761902, + "objective/train/docs_used": 1074511, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.044644594192505, + "objective/train/original_loss": 3.044644832611084, + "objective/train/theoretical_loss": 3.445085680809207, + "objective/train/tokens_used": 1921004000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23717336356639862, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493569374084473, + "objective/train/weighted_lm_loss": 3.19500470161438, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9591754674911499, + "theoretical_loss": 3.445085680809207, + "tokens_seen": 1900544000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016212637154748393, + "loss": 3.0576, + "theoretical_loss": 3.445005658286722, + "tokens_seen": 1901068288 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016193719258418462, + "loss": 3.0648, + "theoretical_loss": 3.4448456979543556, + "tokens_seen": 1902116864 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016174801362088537, + "loss": 3.0426, + "theoretical_loss": 3.4446858504538644, + "tokens_seen": 1903165440 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.49052512645721436, + "objective/train/docs_used": 1076404, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9473235607147217, + "objective/train/original_loss": 2.94732403755188, + "objective/train/theoretical_loss": 3.4445860030005253, + "objective/train/tokens_used": 1924280800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24399477243423462, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502928495407104, + "objective/train/weighted_lm_loss": 3.0951266288757324, + "objective/train/weights_max": 1.0512195825576782, + "objective/train/weights_min": 0.9533544182777405, + "theoretical_loss": 3.4445860030005253, + "tokens_seen": 1903820800 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001615588346575861, + "loss": 3.0305, + "theoretical_loss": 3.4445261156435585, + "tokens_seen": 1904214016 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001613696556942868, + "loss": 3.016, + "theoretical_loss": 3.4443664933820055, + "tokens_seen": 1905262592 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001611804767309875, + "loss": 3.0832, + "theoretical_loss": 3.4442069835280265, + "tokens_seen": 1906311168 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.47672393918037415, + "objective/train/docs_used": 1078284, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2508022785186768, + "objective/train/original_loss": 3.2508018016815186, + "objective/train/theoretical_loss": 3.4440874248202293, + "objective/train/tokens_used": 1927557600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23937898874282837, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048888921737671, + "objective/train/weighted_lm_loss": 3.4074931144714355, + "objective/train/weights_max": 1.0512206554412842, + "objective/train/weights_min": 0.9522076845169067, + "theoretical_loss": 3.4440874248202293, + "tokens_seen": 1907097600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016099129776768822, + "loss": 3.0102, + "theoretical_loss": 3.4440475859406985, + "tokens_seen": 1907359744 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016080211880438894, + "loss": 3.0034, + "theoretical_loss": 3.4438883004793515, + "tokens_seen": 1908408320 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001606129398410897, + "loss": 3.0316, + "theoretical_loss": 3.4437291270035697, + "tokens_seen": 1909456896 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.48424607515335083, + "objective/train/docs_used": 1079917, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7775046825408936, + "objective/train/original_loss": 2.7775044441223145, + "objective/train/theoretical_loss": 3.4435899419652256, + "objective/train/tokens_used": 1930834400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23730778694152832, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496307611465454, + "objective/train/weighted_lm_loss": 2.9170379638671875, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9515936970710754, + "theoretical_loss": 3.4435899419652256, + "tokens_seen": 1910374400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016042376087779038, + "loss": 3.0128, + "theoretical_loss": 3.443570065373189, + "tokens_seen": 1910505472 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001602345819144911, + "loss": 3.0617, + "theoretical_loss": 3.443411115448299, + "tokens_seen": 1911554048 + }, + { + "epoch": 0.68, + "learning_rate": 0.00016004540295119183, + "loss": 3.0827, + "theoretical_loss": 3.4432522770892398, + "tokens_seen": 1912602624 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.48178908228874207, + "objective/train/docs_used": 1081633, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.645468235015869, + "objective/train/original_loss": 2.6454684734344482, + "objective/train/theoretical_loss": 3.443093550156604, + "objective/train/tokens_used": 1934111200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24107873439788818, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494041442871094, + "objective/train/weighted_lm_loss": 2.7751283645629883, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9514924883842468, + "theoretical_loss": 3.443093550156604, + "tokens_seen": 1913651200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015985622398789255, + "loss": 3.0849, + "theoretical_loss": 3.443093550156604, + "tokens_seen": 1913651200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00015966704502459327, + "loss": 3.0829, + "theoretical_loss": 3.4429349345112326, + "tokens_seen": 1914699776 + }, + { + "epoch": 0.68, + "learning_rate": 0.000159477866061294, + "loss": 3.0307, + "theoretical_loss": 3.442776430014218, + "tokens_seen": 1915748352 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001592886870979947, + "loss": 3.1064, + "theoretical_loss": 3.442618036526901, + "tokens_seen": 1916796928 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.4814731180667877, + "objective/train/docs_used": 1083428, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.869269609451294, + "objective/train/original_loss": 2.869269847869873, + "objective/train/theoretical_loss": 3.44259824513946, + "objective/train/tokens_used": 1937388000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24218010902404785, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493781566619873, + "objective/train/weighted_lm_loss": 3.0097241401672363, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9519504904747009, + "theoretical_loss": 3.44259824513946, + "tokens_seen": 1916928000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015909950813469543, + "loss": 3.1398, + "theoretical_loss": 3.4424597539108728, + "tokens_seen": 1917845504 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015891032917139615, + "loss": 3.1185, + "theoretical_loss": 3.4423015820279703, + "tokens_seen": 1918894080 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015872115020809684, + "loss": 3.0341, + "theoretical_loss": 3.4421435207402795, + "tokens_seen": 1919942656 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.48610636591911316, + "objective/train/docs_used": 1085055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.043754816055298, + "objective/train/original_loss": 3.043754816055298, + "objective/train/theoretical_loss": 3.44210402268272, + "objective/train/tokens_used": 1940664800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23820815980434418, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049821376800537, + "objective/train/weighted_lm_loss": 3.1965363025665283, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9668196439743042, + "theoretical_loss": 3.44210402268272, + "tokens_seen": 1920204800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015853197124479756, + "loss": 3.149, + "theoretical_loss": 3.441985569910133, + "tokens_seen": 1920991232 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001583427922814983, + "loss": 3.1465, + "theoretical_loss": 3.44182772940011, + "tokens_seen": 1922039808 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015815361331819903, + "loss": 3.1075, + "theoretical_loss": 3.441669999073035, + "tokens_seen": 1923088384 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.4901277422904968, + "objective/train/docs_used": 1086767, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.144951343536377, + "objective/train/original_loss": 3.144951343536377, + "objective/train/theoretical_loss": 3.4416108785789676, + "objective/train/tokens_used": 1943941600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24380195140838623, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502521991729736, + "objective/train/weighted_lm_loss": 3.3028461933135986, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9523271918296814, + "theoretical_loss": 3.4416108785789676, + "tokens_seen": 1923481600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015796443435489975, + "loss": 3.1408, + "theoretical_loss": 3.441512378791978, + "tokens_seen": 1924136960 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015777525539160045, + "loss": 3.1612, + "theoretical_loss": 3.4413548684202544, + "tokens_seen": 1925185536 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015758607642830117, + "loss": 3.1644, + "theoretical_loss": 3.4411974678214223, + "tokens_seen": 1926234112 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.48439183831214905, + "objective/train/docs_used": 1088753, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6918206214904785, + "objective/train/original_loss": 2.6918208599090576, + "objective/train/theoretical_loss": 3.4411188086442692, + "objective/train/tokens_used": 1947218400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23981790244579315, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049658179283142, + "objective/train/weighted_lm_loss": 2.8249881267547607, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9570189118385315, + "theoretical_loss": 3.4411188086442692, + "tokens_seen": 1926758400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001573968974650019, + "loss": 3.162, + "theoretical_loss": 3.4410401768592846, + "tokens_seen": 1927282688 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015720771850170263, + "loss": 3.1948, + "theoretical_loss": 3.440882995397886, + "tokens_seen": 1928331264 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015701853953840333, + "loss": 3.2031, + "theoretical_loss": 3.440725923301515, + "tokens_seen": 1929379840 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.47249332070350647, + "objective/train/docs_used": 1090637, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.311936855316162, + "objective/train/original_loss": 3.311936855316162, + "objective/train/theoretical_loss": 3.4406278087180064, + "objective/train/tokens_used": 1950495200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24297377467155457, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484838485717773, + "objective/train/weighted_lm_loss": 3.4713997840881348, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9513853192329407, + "theoretical_loss": 3.4406278087180064, + "tokens_seen": 1930035200 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015682936057510405, + "loss": 3.1742, + "theoretical_loss": 3.4405689604347005, + "tokens_seen": 1930428416 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015664018161180477, + "loss": 3.154, + "theoretical_loss": 3.4404121066622144, + "tokens_seen": 1931476992 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001564510026485055, + "loss": 3.1313, + "theoretical_loss": 3.4402553618490668, + "tokens_seen": 1932525568 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.4777022898197174, + "objective/train/docs_used": 1092665, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2258169651031494, + "objective/train/original_loss": 3.2258167266845703, + "objective/train/theoretical_loss": 3.440137874662704, + "objective/train/tokens_used": 1953772000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23566052317619324, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048967957496643, + "objective/train/weighted_lm_loss": 3.3833398818969727, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9548637866973877, + "theoretical_loss": 3.440137874662704, + "tokens_seen": 1933312000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015626182368520618, + "loss": 3.1434, + "theoretical_loss": 3.4400987258605102, + "tokens_seen": 1933574144 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015607264472190693, + "loss": 3.1228, + "theoretical_loss": 3.4399421985620364, + "tokens_seen": 1934622720 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015588346575860765, + "loss": 3.109, + "theoretical_loss": 3.439785779819375, + "tokens_seen": 1935671296 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.48318758606910706, + "objective/train/docs_used": 1094661, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.008556842803955, + "objective/train/original_loss": 3.0085573196411133, + "objective/train/theoretical_loss": 3.439649002363864, + "objective/train/tokens_used": 1957048800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24022220075130463, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495398044586182, + "objective/train/weighted_lm_loss": 3.1570565700531006, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9545727372169495, + "theoretical_loss": 3.439649002363864, + "tokens_seen": 1936588800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015569428679530837, + "loss": 3.1473, + "theoretical_loss": 3.4396294694984952, + "tokens_seen": 1936719872 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001555051078320091, + "loss": 3.1707, + "theoretical_loss": 3.439473267465604, + "tokens_seen": 1937768448 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015531592886870979, + "loss": 3.0851, + "theoretical_loss": 3.4393171735871446, + "tokens_seen": 1938817024 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.4772590696811676, + "objective/train/docs_used": 1096594, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8366477489471436, + "objective/train/original_loss": 2.8366475105285645, + "objective/train/theoretical_loss": 3.439161187729799, + "objective/train/tokens_used": 1960325600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23779280483722687, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489344596862793, + "objective/train/weighted_lm_loss": 2.9748737812042236, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9515256285667419, + "theoretical_loss": 3.439161187729799, + "tokens_seen": 1939865600 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001551267499054105, + "loss": 3.0804, + "theoretical_loss": 3.439161187729799, + "tokens_seen": 1939865600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015493757094211125, + "loss": 3.1544, + "theoretical_loss": 3.4390053097604847, + "tokens_seen": 1940914176 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015474839197881197, + "loss": 3.1821, + "theoretical_loss": 3.4388495395463536, + "tokens_seen": 1941962752 + }, + { + "epoch": 0.69, + "learning_rate": 0.00015455921301551267, + "loss": 3.1627, + "theoretical_loss": 3.438693876954795, + "tokens_seen": 1943011328 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.48600539565086365, + "objective/train/docs_used": 1098399, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.878577709197998, + "objective/train/original_loss": 2.8785781860351562, + "objective/train/theoretical_loss": 3.438674426691467, + "objective/train/tokens_used": 1963602400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24080105125904083, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498244762420654, + "objective/train/weighted_lm_loss": 3.0219502449035645, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.952756404876709, + "theoretical_loss": 3.438674426691467, + "tokens_seen": 1943142400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001543700340522134, + "loss": 3.0941, + "theoretical_loss": 3.4385383218534313, + "tokens_seen": 1944059904 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001541808550889141, + "loss": 3.0426, + "theoretical_loss": 3.43838287411012, + "tokens_seen": 1945108480 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015399167612561483, + "loss": 3.1071, + "theoretical_loss": 3.4382275335929506, + "tokens_seen": 1946157056 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.4673837423324585, + "objective/train/docs_used": 1099983, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.590907573699951, + "objective/train/original_loss": 2.590907573699951, + "objective/train/theoretical_loss": 3.4381887152023074, + "objective/train/tokens_used": 1966879200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23693355917930603, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0479426383972168, + "objective/train/weighted_lm_loss": 2.719980478286743, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.969889223575592, + "theoretical_loss": 3.4381887152023074, + "tokens_seen": 1946419200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015380249716231555, + "loss": 3.1018, + "theoretical_loss": 3.4380723001702482, + "tokens_seen": 1947205632 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015361331819901627, + "loss": 3.1348, + "theoretical_loss": 3.4379171737105683, + "tokens_seen": 1948254208 + }, + { + "epoch": 0.7, + "learning_rate": 0.000153424139235717, + "loss": 3.0988, + "theoretical_loss": 3.4377621540826984, + "tokens_seen": 1949302784 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.4762834310531616, + "objective/train/docs_used": 1101732, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0439748764038086, + "objective/train/original_loss": 3.0439748764038086, + "objective/train/theoretical_loss": 3.4377040492380826, + "objective/train/tokens_used": 1970156000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2366829365491867, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488312244415283, + "objective/train/weighted_lm_loss": 3.1925623416900635, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9599282741546631, + "theoretical_loss": 3.4377040492380826, + "tokens_seen": 1949696000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001532349602724177, + "loss": 3.1246, + "theoretical_loss": 3.437607241155659, + "tokens_seen": 1950351360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015304578130911843, + "loss": 3.1243, + "theoretical_loss": 3.4374524347986997, + "tokens_seen": 1951399936 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015285660234581913, + "loss": 3.0839, + "theoretical_loss": 3.437297734881301, + "tokens_seen": 1952448512 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.4811760485172272, + "objective/train/docs_used": 1103876, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.098599910736084, + "objective/train/original_loss": 3.098599433898926, + "objective/train/theoretical_loss": 3.4372204247967133, + "objective/train/tokens_used": 1973432800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24242191016674042, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493496656417847, + "objective/train/weighted_lm_loss": 3.2506155967712402, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.951482892036438, + "theoretical_loss": 3.4372204247967133, + "tokens_seen": 1952972800 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015266742338251987, + "loss": 3.0771, + "theoretical_loss": 3.437143141273175, + "tokens_seen": 1953497088 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001524782444192206, + "loss": 3.1538, + "theoretical_loss": 3.4369886538442604, + "tokens_seen": 1954545664 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015228906545592131, + "loss": 3.1184, + "theoretical_loss": 3.4368342724647265, + "tokens_seen": 1955594240 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.4940529763698578, + "objective/train/docs_used": 1105993, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3750038146972656, + "objective/train/original_loss": 3.3750038146972656, + "objective/train/theoretical_loss": 3.436737837898122, + "objective/train/tokens_used": 1976709600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2451893538236618, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506517887115479, + "objective/train/weighted_lm_loss": 3.5460386276245117, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9603523015975952, + "theoretical_loss": 3.436737837898122, + "tokens_seen": 1956249600 + }, + { + "epoch": 0.7, + "learning_rate": 0.000152099886492622, + "loss": 3.1778, + "theoretical_loss": 3.43667999700497, + "tokens_seen": 1956642816 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015191070752932273, + "loss": 3.1804, + "theoretical_loss": 3.4365258273356156, + "tokens_seen": 1957691392 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015172152856602345, + "loss": 3.1596, + "theoretical_loss": 3.4363717633275153, + "tokens_seen": 1958739968 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.4944164454936981, + "objective/train/docs_used": 1108290, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2766504287719727, + "objective/train/original_loss": 3.2766504287719727, + "objective/train/theoretical_loss": 3.4362562845840747, + "objective/train/tokens_used": 1979986400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2452101707458496, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506881475448608, + "objective/train/weighted_lm_loss": 3.4432315826416016, + "objective/train/weights_max": 1.0512208938598633, + "objective/train/weights_min": 0.9669325351715088, + "theoretical_loss": 3.4362562845840747, + "tokens_seen": 1959526400 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001515323496027242, + "loss": 3.1575, + "theoretical_loss": 3.436217804851747, + "tokens_seen": 1959788544 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015134317063942492, + "loss": 3.1834, + "theoretical_loss": 3.4360639517796168, + "tokens_seen": 1960837120 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001511539916761256, + "loss": 3.1164, + "theoretical_loss": 3.4359102039826546, + "tokens_seen": 1961885696 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.49615535140037537, + "objective/train/docs_used": 1110185, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1375579833984375, + "objective/train/original_loss": 3.1375579833984375, + "objective/train/theoretical_loss": 3.435775760918026, + "objective/train/tokens_used": 1983263200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24696744978427887, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0508711338043213, + "objective/train/weighted_lm_loss": 3.2971224784851074, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.957501232624054, + "theoretical_loss": 3.435775760918026, + "tokens_seen": 1962803200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015096481271282633, + "loss": 3.1439, + "theoretical_loss": 3.4357565613326155, + "tokens_seen": 1962934272 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015077563374952705, + "loss": 3.0854, + "theoretical_loss": 3.4356030237014803, + "tokens_seen": 1963982848 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015058645478622777, + "loss": 3.1707, + "theoretical_loss": 3.435449590961453, + "tokens_seen": 1965031424 + }, + { + "debugging/Self-BLEU-5": 0.4796244782337672, + "debugging/distinct-1-grams": 0.7874466763773558, + "debugging/distinct-2-grams": 0.9537592626603371, + "debugging/entropy-1-grams": 6.225892551400493, + "debugging/entropy-2-grams": 7.143796200176747, + "debugging/length": 450.05, + "debugging/num_segments": 20, + "debugging/raw_token_scores_avg": 0.028914527967572212, + "debugging/raw_token_scores_std": 0.09425558149814606, + "epoch": 0.7, + "objective/train/advantage_avg": 0.47108298540115356, + "objective/train/docs_used": 1112341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.987183094024658, + "objective/train/original_loss": 2.987183094024658, + "objective/train/theoretical_loss": 3.435296262984961, + "objective/train/tokens_used": 1986540000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23080673813819885, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0482814311981201, + "objective/train/weighted_lm_loss": 3.131026029586792, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9537268877029419, + "theoretical_loss": 3.435296262984961, + "tokens_seen": 1966080000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001503972758229285, + "loss": 3.1423, + "theoretical_loss": 3.435296262984961, + "tokens_seen": 1966080000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001502080968596292, + "loss": 3.0294, + "theoretical_loss": 3.4351430396446565, + "tokens_seen": 1967128576 + }, + { + "epoch": 0.7, + "learning_rate": 0.00015001891789632993, + "loss": 3.1178, + "theoretical_loss": 3.4349899208134125, + "tokens_seen": 1968177152 + }, + { + "epoch": 0.7, + "learning_rate": 0.00014982973893303065, + "loss": 3.1248, + "theoretical_loss": 3.4348369063643247, + "tokens_seen": 1969225728 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.4847583472728729, + "objective/train/docs_used": 1114494, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9856464862823486, + "objective/train/original_loss": 2.985647201538086, + "objective/train/theoretical_loss": 3.434817786891247, + "objective/train/tokens_used": 1989816800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24014219641685486, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049696683883667, + "objective/train/weighted_lm_loss": 3.1322898864746094, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9546946883201599, + "theoretical_loss": 3.434817786891247, + "tokens_seen": 1969356800 + }, + { + "epoch": 0.7, + "learning_rate": 0.00014964055996973135, + "loss": 3.0907, + "theoretical_loss": 3.4346839961707096, + "tokens_seen": 1970274304 + }, + { + "epoch": 0.7, + "learning_rate": 0.00014945138100643207, + "loss": 3.1122, + "theoretical_loss": 3.4345311901061066, + "tokens_seen": 1971322880 + }, + { + "epoch": 0.7, + "learning_rate": 0.00014926220204313282, + "loss": 3.0943, + "theoretical_loss": 3.434378488044274, + "tokens_seen": 1972371456 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.49263060092926025, + "objective/train/docs_used": 1115816, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0610079765319824, + "objective/train/original_loss": 3.0610079765319824, + "objective/train/theoretical_loss": 3.434340328764476, + "objective/train/tokens_used": 1993093600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24445389211177826, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505057573318481, + "objective/train/weighted_lm_loss": 3.2152202129364014, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9796652793884277, + "theoretical_loss": 3.434340328764476, + "tokens_seen": 1972633600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00014907302307983354, + "loss": 3.1552, + "theoretical_loss": 3.4342258898591904, + "tokens_seen": 1973420032 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014888384411653426, + "loss": 3.1141, + "theoretical_loss": 3.434073395425055, + "tokens_seen": 1974468608 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014869466515323495, + "loss": 3.0634, + "theoretical_loss": 3.433921004616285, + "tokens_seen": 1975517184 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.47501012682914734, + "objective/train/docs_used": 1117825, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.095318078994751, + "objective/train/original_loss": 3.095317840576172, + "objective/train/theoretical_loss": 3.433863884753317, + "objective/train/tokens_used": 1996370400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23647457361221313, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487028360366821, + "objective/train/weighted_lm_loss": 3.245806932449341, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9513912200927734, + "theoretical_loss": 3.433863884753317, + "tokens_seen": 1975910400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014850548618993567, + "loss": 3.0918, + "theoretical_loss": 3.433768717307516, + "tokens_seen": 1976565760 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001483163072266364, + "loss": 3.1025, + "theoretical_loss": 3.4336165333736037, + "tokens_seen": 1977614336 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014812712826333714, + "loss": 3.0333, + "theoretical_loss": 3.433464452689618, + "tokens_seen": 1978662912 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.48081904649734497, + "objective/train/docs_used": 1119491, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.902231454849243, + "objective/train/original_loss": 2.902231216430664, + "objective/train/theoretical_loss": 3.433388451027367, + "objective/train/tokens_used": 1999647200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23978132009506226, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493004322052002, + "objective/train/weighted_lm_loss": 3.0465242862701416, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9542652368545532, + "theoretical_loss": 3.433388451027367, + "tokens_seen": 1979187200 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014793794930003783, + "loss": 3.0627, + "theoretical_loss": 3.4333124751308484, + "tokens_seen": 1979711488 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014774877033673855, + "loss": 3.0599, + "theoretical_loss": 3.4331606005728004, + "tokens_seen": 1980760064 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014755959137343927, + "loss": 3.0972, + "theoretical_loss": 3.4330088288911953, + "tokens_seen": 1981808640 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.47215214371681213, + "objective/train/docs_used": 1121511, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.187962055206299, + "objective/train/original_loss": 3.1879618167877197, + "objective/train/theoretical_loss": 3.432914023777001, + "objective/train/tokens_used": 2002924000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23091299831867218, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0483888387680054, + "objective/train/weighted_lm_loss": 3.343453884124756, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9539908170700073, + "theoretical_loss": 3.432914023777001, + "tokens_seen": 1982464000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014737041241014, + "loss": 3.0673, + "theoretical_loss": 3.43285715996197, + "tokens_seen": 1982857216 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001471812334468407, + "loss": 3.0675, + "theoretical_loss": 3.432705593661278, + "tokens_seen": 1983905792 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014699205448354144, + "loss": 3.0601, + "theoretical_loss": 3.4325541298654842, + "tokens_seen": 1984954368 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.4908943474292755, + "objective/train/docs_used": 1123355, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.127197504043579, + "objective/train/original_loss": 3.127197265625, + "objective/train/theoretical_loss": 3.4324405992132268, + "objective/train/tokens_used": 2006200800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24222372472286224, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503206253051758, + "objective/train/weighted_lm_loss": 3.284457206726074, + "objective/train/weights_max": 1.0512199401855469, + "objective/train/weights_min": 0.9885050654411316, + "theoretical_loss": 3.4324405992132268, + "tokens_seen": 1985740800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014680287552024216, + "loss": 3.1086, + "theoretical_loss": 3.432402768451171, + "tokens_seen": 1986002944 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014661369655694288, + "loss": 3.1612, + "theoretical_loss": 3.4322515092951322, + "tokens_seen": 1987051520 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001464245175936436, + "loss": 3.044, + "theoretical_loss": 3.432100352274376, + "tokens_seen": 1988100096 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.4853982627391815, + "objective/train/docs_used": 1125335, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.052032232284546, + "objective/train/original_loss": 3.052031993865967, + "objective/train/theoretical_loss": 3.4319681735675376, + "objective/train/tokens_used": 2009477600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24046699702739716, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04976224899292, + "objective/train/weighted_lm_loss": 3.203836679458618, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9524861574172974, + "theoretical_loss": 3.4319681735675376, + "tokens_seen": 1989017600 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001462353386303443, + "loss": 3.1721, + "theoretical_loss": 3.431949297266123, + "tokens_seen": 1989148672 + }, + { + "epoch": 0.71, + "learning_rate": 0.000146046159667045, + "loss": 3.1841, + "theoretical_loss": 3.4317983441478064, + "tokens_seen": 1990197248 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014585698070374576, + "loss": 3.1459, + "theoretical_loss": 3.4316474927970697, + "tokens_seen": 1991245824 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.4899314045906067, + "objective/train/docs_used": 1126843, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9000444412231445, + "objective/train/original_loss": 2.9000439643859863, + "objective/train/theoretical_loss": 3.431496743091769, + "objective/train/tokens_used": 2012754400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24320422112941742, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502294301986694, + "objective/train/weighted_lm_loss": 3.0460522174835205, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9620993733406067, + "theoretical_loss": 3.431496743091769, + "tokens_seen": 1992294400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014566780174044648, + "loss": 3.1331, + "theoretical_loss": 3.431496743091769, + "tokens_seen": 1992294400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014547862277714717, + "loss": 3.1277, + "theoretical_loss": 3.431346094909971, + "tokens_seen": 1993342976 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001452894438138479, + "loss": 3.1989, + "theoretical_loss": 3.4311955481299528, + "tokens_seen": 1994391552 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014510026485054861, + "loss": 3.205, + "theoretical_loss": 3.4310451026302005, + "tokens_seen": 1995440128 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.48517540097236633, + "objective/train/docs_used": 1128158, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8031439781188965, + "objective/train/original_loss": 2.8031439781188965, + "objective/train/theoretical_loss": 3.4310263040579567, + "objective/train/tokens_used": 2016031200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23921027779579163, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497334003448486, + "objective/train/weighted_lm_loss": 2.9433789253234863, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.955235481262207, + "theoretical_loss": 3.4310263040579567, + "tokens_seen": 1995571200 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014491108588724933, + "loss": 3.1294, + "theoretical_loss": 3.430894758289411, + "tokens_seen": 1996488704 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014472190692395008, + "loss": 3.2061, + "theoretical_loss": 3.4307445149864884, + "tokens_seen": 1997537280 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014453272796065078, + "loss": 3.1406, + "theoretical_loss": 3.430594372600546, + "tokens_seen": 1998585856 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.49511581659317017, + "objective/train/docs_used": 1130081, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1099729537963867, + "objective/train/original_loss": 3.109973192214966, + "objective/train/theoretical_loss": 3.430556852758192, + "objective/train/tokens_used": 2019308000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2468186765909195, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0507664680480957, + "objective/train/weighted_lm_loss": 3.267641544342041, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9615699052810669, + "theoretical_loss": 3.430556852758192, + "tokens_seen": 1998848000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001443435489973515, + "loss": 3.1497, + "theoretical_loss": 3.4304443310109063, + "tokens_seen": 1999634432 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014415437003405222, + "loss": 3.092, + "theoretical_loss": 3.430294390097097, + "tokens_seen": 2000683008 + }, + { + "epoch": 0.71, + "learning_rate": 0.00014396519107075294, + "loss": 3.1712, + "theoretical_loss": 3.4301445497388547, + "tokens_seen": 2001731584 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.4925604462623596, + "objective/train/docs_used": 1132167, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9438576698303223, + "objective/train/original_loss": 2.943857431411743, + "objective/train/theoretical_loss": 3.4300883855044857, + "objective/train/tokens_used": 2022584800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2444770783185959, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504989624023438, + "objective/train/weighted_lm_loss": 3.092851161956787, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9527947306632996, + "theoretical_loss": 3.4300883855044857, + "tokens_seen": 2002124800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014377601210745363, + "loss": 3.1484, + "theoretical_loss": 3.4299948098161215, + "tokens_seen": 2002780160 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014358683314415438, + "loss": 3.132, + "theoretical_loss": 3.4298451702090462, + "tokens_seen": 2003828736 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001433976541808551, + "loss": 3.1449, + "theoretical_loss": 3.4296956307979825, + "tokens_seen": 2004877312 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.47776997089385986, + "objective/train/docs_used": 1133291, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9905569553375244, + "objective/train/original_loss": 2.990556478500366, + "objective/train/theoretical_loss": 3.429620898628622, + "objective/train/tokens_used": 2025861600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2385081797838211, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048988938331604, + "objective/train/weighted_lm_loss": 3.1350185871124268, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9514456391334534, + "theoretical_loss": 3.429620898628622, + "tokens_seen": 2005401600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014320847521755582, + "loss": 3.1673, + "theoretical_loss": 3.42954619146349, + "tokens_seen": 2005925888 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001430192962542565, + "loss": 3.1407, + "theoretical_loss": 3.4293968520863336, + "tokens_seen": 2006974464 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014283011729095723, + "loss": 3.1269, + "theoretical_loss": 3.4292476125474805, + "tokens_seen": 2008023040 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.4895833432674408, + "objective/train/docs_used": 1134873, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.12807035446167, + "objective/train/original_loss": 3.128070831298828, + "objective/train/theoretical_loss": 3.4291543884820275, + "objective/train/tokens_used": 2029138400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2412732094526291, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501848459243774, + "objective/train/weighted_lm_loss": 3.2846426963806152, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9555214643478394, + "theoretical_loss": 3.4291543884820275, + "tokens_seen": 2008678400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014264093832765795, + "loss": 3.1118, + "theoretical_loss": 3.429098472728103, + "tokens_seen": 2009071616 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001424517593643587, + "loss": 3.0736, + "theoretical_loss": 3.4289494325095773, + "tokens_seen": 2010120192 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014226258040105942, + "loss": 3.1048, + "theoretical_loss": 3.4288004917734813, + "tokens_seen": 2011168768 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.45610880851745605, + "objective/train/docs_used": 1136813, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.221322774887085, + "objective/train/original_loss": 3.221322536468506, + "objective/train/theoretical_loss": 3.42868885143563, + "objective/train/tokens_used": 2032415200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23579107224941254, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0468087196350098, + "objective/train/weighted_lm_loss": 3.370765447616577, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.953737735748291, + "theoretical_loss": 3.42868885143563, + "tokens_seen": 2011955200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014207340143776012, + "loss": 3.1595, + "theoretical_loss": 3.428651650401596, + "tokens_seen": 2012217344 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014188422247446084, + "loss": 3.1331, + "theoretical_loss": 3.4285029082759046, + "tokens_seen": 2013265920 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014169504351116156, + "loss": 3.1415, + "theoretical_loss": 3.4283542652785908, + "tokens_seen": 2014314496 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.4859057068824768, + "objective/train/docs_used": 1138919, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.946512460708618, + "objective/train/original_loss": 2.946512222290039, + "objective/train/theoretical_loss": 3.4282242838797234, + "objective/train/tokens_used": 2035692000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23969919979572296, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498089790344238, + "objective/train/weighted_lm_loss": 3.09318208694458, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9610596299171448, + "theoretical_loss": 3.4282242838797234, + "tokens_seen": 2015232000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014150586454786228, + "loss": 3.1535, + "theoretical_loss": 3.428205721292041, + "tokens_seen": 2015363072 + }, + { + "epoch": 0.72, + "learning_rate": 0.000141316685584563, + "loss": 3.1432, + "theoretical_loss": 3.4280572761988406, + "tokens_seen": 2016411648 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014112750662126372, + "loss": 3.1212, + "theoretical_loss": 3.4279089298817764, + "tokens_seen": 2017460224 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.4828082323074341, + "objective/train/docs_used": 1140999, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7979843616485596, + "objective/train/original_loss": 2.7979846000671387, + "objective/train/theoretical_loss": 3.427760682223834, + "objective/train/tokens_used": 2038968800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23849663138389587, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494930744171143, + "objective/train/weighted_lm_loss": 2.936995029449463, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.954368531703949, + "theoretical_loss": 3.427760682223834, + "tokens_seen": 2018508800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014093832765796444, + "loss": 3.119, + "theoretical_loss": 3.427760682223834, + "tokens_seen": 2018508800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014074914869466516, + "loss": 3.1216, + "theoretical_loss": 3.4276125331081997, + "tokens_seen": 2019557376 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014055996973136588, + "loss": 3.0604, + "theoretical_loss": 3.427464482418257, + "tokens_seen": 2020605952 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014037079076806657, + "loss": 3.0767, + "theoretical_loss": 3.4273165300375887, + "tokens_seen": 2021654528 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.4603384733200073, + "objective/train/docs_used": 1143312, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7195169925689697, + "objective/train/original_loss": 2.719517230987549, + "objective/train/theoretical_loss": 3.427298042896586, + "objective/train/tokens_used": 2042245600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23422212898731232, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0472242832183838, + "objective/train/weighted_lm_loss": 2.849896192550659, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9517197012901306, + "theoretical_loss": 3.427298042896586, + "tokens_seen": 2021785600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00014018161180476732, + "loss": 3.0877, + "theoretical_loss": 3.427168675849976, + "tokens_seen": 2022703104 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013999243284146804, + "loss": 3.1204, + "theoretical_loss": 3.427020919739397, + "tokens_seen": 2023751680 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013980325387816876, + "loss": 3.0585, + "theoretical_loss": 3.4268732615900266, + "tokens_seen": 2024800256 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.4914935827255249, + "objective/train/docs_used": 1144755, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.170673131942749, + "objective/train/original_loss": 3.170673370361328, + "objective/train/theoretical_loss": 3.4268363623455684, + "objective/train/tokens_used": 2045522400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24298258125782013, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503846406936646, + "objective/train/weighted_lm_loss": 3.3304572105407715, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.981985330581665, + "theoretical_loss": 3.4268363623455684, + "tokens_seen": 2025062400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013961407491486946, + "loss": 3.0733, + "theoretical_loss": 3.4267257012862373, + "tokens_seen": 2025848832 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013942489595157018, + "loss": 3.074, + "theoretical_loss": 3.4265782387125974, + "tokens_seen": 2026897408 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001392357169882709, + "loss": 3.1012, + "theoretical_loss": 3.426430873753871, + "tokens_seen": 2027945984 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.49117955565452576, + "objective/train/docs_used": 1146855, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9218034744262695, + "objective/train/original_loss": 2.9218032360076904, + "objective/train/theoretical_loss": 3.426375637037207, + "objective/train/tokens_used": 2048799200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2444514036178589, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503606796264648, + "objective/train/weighted_lm_loss": 3.0689315795898438, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9535993337631226, + "theoretical_loss": 3.426375637037207, + "tokens_seen": 2028339200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013904653802497164, + "loss": 3.104, + "theoretical_loss": 3.4262836062950175, + "tokens_seen": 2028994560 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013885735906167234, + "loss": 3.0803, + "theoretical_loss": 3.4261364362211912, + "tokens_seen": 2030043136 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013866818009837306, + "loss": 3.0555, + "theoretical_loss": 3.425989363417741, + "tokens_seen": 2031091712 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.482932448387146, + "objective/train/docs_used": 1149039, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0094153881073, + "objective/train/original_loss": 3.0094151496887207, + "objective/train/theoretical_loss": 3.425915863456632, + "objective/train/tokens_used": 2052076000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23867341876029968, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049506425857544, + "objective/train/weighted_lm_loss": 3.15950608253479, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9558206796646118, + "theoretical_loss": 3.425915863456632, + "tokens_seen": 2031616000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013847900113507378, + "loss": 3.0104, + "theoretical_loss": 3.42584238777021, + "tokens_seen": 2032140288 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001382898221717745, + "loss": 3.0758, + "theoretical_loss": 3.4256955091643353, + "tokens_seen": 2033188864 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013810064320847522, + "loss": 3.0, + "theoretical_loss": 3.4255487274860457, + "tokens_seen": 2034237440 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.48947277665138245, + "objective/train/docs_used": 1151240, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.842642068862915, + "objective/train/original_loss": 2.842642307281494, + "objective/train/theoretical_loss": 3.425457038107547, + "objective/train/tokens_used": 2055352800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24280238151550293, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.05018150806427, + "objective/train/weighted_lm_loss": 2.9852519035339355, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9556968212127686, + "theoretical_loss": 3.425457038107547, + "tokens_seen": 2034892800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013791146424517594, + "loss": 2.9684, + "theoretical_loss": 3.425402042621465, + "tokens_seen": 2035286016 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013772228528187666, + "loss": 3.0045, + "theoretical_loss": 3.4252554544569076, + "tokens_seen": 2036334592 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013753310631857738, + "loss": 2.9956, + "theoretical_loss": 3.4251089628788804, + "tokens_seen": 2037383168 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.4755612313747406, + "objective/train/docs_used": 1153257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9520466327667236, + "objective/train/original_loss": 2.9520463943481445, + "objective/train/theoretical_loss": 3.4249991575121053, + "objective/train/tokens_used": 2058629600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23398137092590332, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0487455129623413, + "objective/train/weighted_lm_loss": 3.0983822345733643, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9720361828804016, + "theoretical_loss": 3.4249991575121053, + "tokens_seen": 2038169600 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001373439273552781, + "loss": 3.0354, + "theoretical_loss": 3.4249625677740823, + "tokens_seen": 2038431744 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001371547483919788, + "loss": 3.0449, + "theoretical_loss": 3.424816269029402, + "tokens_seen": 2039480320 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013696556942867952, + "loss": 3.0413, + "theoretical_loss": 3.424670066531922, + "tokens_seen": 2040528896 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.48594948649406433, + "objective/train/docs_used": 1155326, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8335914611816406, + "objective/train/original_loss": 2.833591938018799, + "objective/train/theoretical_loss": 3.4245422182107816, + "objective/train/tokens_used": 2061906400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2407456785440445, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049818515777588, + "objective/train/weighted_lm_loss": 2.974628448486328, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9524859189987183, + "theoretical_loss": 3.4245422182107816, + "tokens_seen": 2041446400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013677639046538026, + "loss": 3.065, + "theoretical_loss": 3.4245239601689104, + "tokens_seen": 2041577472 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013658721150208098, + "loss": 3.0519, + "theoretical_loss": 3.4243779498278286, + "tokens_seen": 2042626048 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013639803253878168, + "loss": 3.1046, + "theoretical_loss": 3.4242320353963267, + "tokens_seen": 2043674624 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.489499568939209, + "objective/train/docs_used": 1157326, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9112443923950195, + "objective/train/original_loss": 2.9112446308135986, + "objective/train/theoretical_loss": 3.4240862167622437, + "objective/train/tokens_used": 2065183200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24297621846199036, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501850843429565, + "objective/train/weighted_lm_loss": 3.057767391204834, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9573585987091064, + "theoretical_loss": 3.4240862167622437, + "tokens_seen": 2044723200 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001362088535754824, + "loss": 3.0655, + "theoretical_loss": 3.4240862167622437, + "tokens_seen": 2044723200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013601967461218312, + "loss": 3.039, + "theoretical_loss": 3.423940493813606, + "tokens_seen": 2045771776 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013583049564888384, + "loss": 3.0649, + "theoretical_loss": 3.42379486643863, + "tokens_seen": 2046820352 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001356413166855846, + "loss": 3.0511, + "theoretical_loss": 3.4236493345257193, + "tokens_seen": 2047868928 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.4733245372772217, + "objective/train/docs_used": 1159281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9886927604675293, + "objective/train/original_loss": 2.9886927604675293, + "objective/train/theoretical_loss": 3.4236311497432315, + "objective/train/tokens_used": 2068460000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23410466313362122, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485223531723022, + "objective/train/weighted_lm_loss": 3.134115219116211, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9530761241912842, + "theoretical_loss": 3.4236311497432315, + "tokens_seen": 2048000000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013545213772228528, + "loss": 3.0334, + "theoretical_loss": 3.4235038979634647, + "tokens_seen": 2048917504 + }, + { + "epoch": 0.73, + "learning_rate": 0.000135262958758986, + "loss": 3.0765, + "theoretical_loss": 3.4233585566406433, + "tokens_seen": 2049966080 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013507377979568672, + "loss": 3.0136, + "theoretical_loss": 3.4232133104462195, + "tokens_seen": 2051014656 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.4777950942516327, + "objective/train/docs_used": 1161569, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8775670528411865, + "objective/train/original_loss": 2.8775668144226074, + "objective/train/theoretical_loss": 3.4231770137484316, + "objective/train/tokens_used": 2071736800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23451338708400726, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489715337753296, + "objective/train/weighted_lm_loss": 3.0206103324890137, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9626436829566956, + "theoretical_loss": 3.4231770137484316, + "tokens_seen": 2051276800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013488460083238744, + "loss": 3.0315, + "theoretical_loss": 3.423068159269344, + "tokens_seen": 2052063232 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013469542186908814, + "loss": 3.0256, + "theoretical_loss": 3.422923102999353, + "tokens_seen": 2053111808 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013450624290578888, + "loss": 3.0888, + "theoretical_loss": 3.4227781415257676, + "tokens_seen": 2054160384 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.48282214999198914, + "objective/train/docs_used": 1163144, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7914364337921143, + "objective/train/original_loss": 2.791436195373535, + "objective/train/theoretical_loss": 3.422723805390355, + "objective/train/tokens_used": 2075013600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2376357913017273, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049489974975586, + "objective/train/weighted_lm_loss": 2.928440809249878, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9537105560302734, + "theoretical_loss": 3.422723805390355, + "tokens_seen": 2054553600 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001343170639424896, + "loss": 2.9756, + "theoretical_loss": 3.4226332747382946, + "tokens_seen": 2055208960 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013412788497919032, + "loss": 3.0469, + "theoretical_loss": 3.422488502526824, + "tokens_seen": 2056257536 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013393870601589104, + "loss": 3.0435, + "theoretical_loss": 3.422343824781432, + "tokens_seen": 2057306112 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.48526403307914734, + "objective/train/docs_used": 1165022, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.139117956161499, + "objective/train/original_loss": 3.139117956161499, + "objective/train/theoretical_loss": 3.422271521299214, + "objective/train/tokens_used": 2078290400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2393149435520172, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497426986694336, + "objective/train/weighted_lm_loss": 3.2956371307373047, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9516664743423462, + "theoretical_loss": 3.422271521299214, + "tokens_seen": 2057830400 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013374952705259174, + "loss": 3.0356, + "theoretical_loss": 3.4221992413923767, + "tokens_seen": 2058354688 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013356034808929246, + "loss": 2.9939, + "theoretical_loss": 3.4220547522500997, + "tokens_seen": 2059403264 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001333711691259932, + "loss": 3.0836, + "theoretical_loss": 3.4219103572452267, + "tokens_seen": 2060451840 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.48648935556411743, + "objective/train/docs_used": 1166735, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.215547800064087, + "objective/train/original_loss": 3.215548038482666, + "objective/train/theoretical_loss": 3.421820158122806, + "objective/train/tokens_used": 2081567200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2412308305501938, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498751401901245, + "objective/train/weighted_lm_loss": 3.376168966293335, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9565590023994446, + "theoretical_loss": 3.421820158122806, + "tokens_seen": 2061107200 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013318199016269393, + "loss": 3.0275, + "theoretical_loss": 3.421766056268565, + "tokens_seen": 2061500416 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013299281119939462, + "loss": 3.0679, + "theoretical_loss": 3.4216218492111032, + "tokens_seen": 2062548992 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013280363223609534, + "loss": 3.0523, + "theoretical_loss": 3.4214777359640136, + "tokens_seen": 2063597568 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.4906824827194214, + "objective/train/docs_used": 1168682, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.187013864517212, + "objective/train/original_loss": 3.187013626098633, + "objective/train/theoretical_loss": 3.4213697125263884, + "objective/train/tokens_used": 2084844000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2428896725177765, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503029823303223, + "objective/train/weighted_lm_loss": 3.3470242023468018, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9735086560249329, + "theoretical_loss": 3.4213697125263884, + "tokens_seen": 2064384000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013261445327279606, + "loss": 3.0907, + "theoretical_loss": 3.4213337164186486, + "tokens_seen": 2064646144 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013242527430949678, + "loss": 3.0811, + "theoretical_loss": 3.4211897904665416, + "tokens_seen": 2065694720 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001322360953461975, + "loss": 3.013, + "theoretical_loss": 3.4210459579994064, + "tokens_seen": 2066743296 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.48356500267982483, + "objective/train/docs_used": 1170870, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.949878454208374, + "objective/train/original_loss": 2.949878692626953, + "objective/train/theoretical_loss": 3.4209201811925642, + "objective/train/tokens_used": 2088120800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24099013209342957, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495814085006714, + "objective/train/weighted_lm_loss": 3.0962560176849365, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9516450762748718, + "theoretical_loss": 3.4209201811925642, + "tokens_seen": 2067660800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013204691638289822, + "loss": 3.0967, + "theoretical_loss": 3.4209022189091374, + "tokens_seen": 2067791872 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013185773741959894, + "loss": 2.9705, + "theoretical_loss": 3.4207585730878085, + "tokens_seen": 2068840448 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013166855845629966, + "loss": 3.0573, + "theoretical_loss": 3.4206150204276726, + "tokens_seen": 2069889024 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.46928921341896057, + "objective/train/docs_used": 1172808, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.938577651977539, + "objective/train/original_loss": 2.938577651977539, + "objective/train/theoretical_loss": 3.420471560821163, + "objective/train/tokens_used": 2091397600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23262350261211395, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481112003326416, + "objective/train/weighted_lm_loss": 3.0799367427825928, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9526295065879822, + "theoretical_loss": 3.420471560821163, + "tokens_seen": 2070937600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013147937949300038, + "loss": 3.0212, + "theoretical_loss": 3.420471560821163, + "tokens_seen": 2070937600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013129020052970108, + "loss": 3.0938, + "theoretical_loss": 3.4203281941608896, + "tokens_seen": 2071986176 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013110102156640183, + "loss": 3.1445, + "theoretical_loss": 3.4201849203396417, + "tokens_seen": 2073034752 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013091184260310255, + "loss": 3.0502, + "theoretical_loss": 3.4200417392503866, + "tokens_seen": 2074083328 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.4865155816078186, + "objective/train/docs_used": 1174600, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.854151964187622, + "objective/train/original_loss": 2.854151725769043, + "objective/train/theoretical_loss": 3.4200238481291243, + "objective/train/tokens_used": 2094674400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24221700429916382, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498827695846558, + "objective/train/weighted_lm_loss": 2.9975836277008057, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9545468688011169, + "theoretical_loss": 3.4200238481291243, + "tokens_seen": 2074214400 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013072266363980327, + "loss": 3.1057, + "theoretical_loss": 3.4198986507862683, + "tokens_seen": 2075131904 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013053348467650396, + "loss": 3.0969, + "theoretical_loss": 3.419755654840608, + "tokens_seen": 2076180480 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013034430571320468, + "loss": 3.1735, + "theoretical_loss": 3.419612751306904, + "tokens_seen": 2077229056 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.48453977704048157, + "objective/train/docs_used": 1176526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9523186683654785, + "objective/train/original_loss": 2.9523184299468994, + "objective/train/theoretical_loss": 3.419577039850382, + "objective/train/tokens_used": 2097951200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23808008432388306, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496641397476196, + "objective/train/weighted_lm_loss": 3.0992777347564697, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9584941267967224, + "theoretical_loss": 3.419577039850382, + "tokens_seen": 2077491200 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001301551267499054, + "loss": 3.1769, + "theoretical_loss": 3.41946994007883, + "tokens_seen": 2078277632 + }, + { + "epoch": 0.74, + "learning_rate": 0.00012996594778660615, + "loss": 3.1278, + "theoretical_loss": 3.4193272210502372, + "tokens_seen": 2079326208 + }, + { + "epoch": 0.74, + "learning_rate": 0.00012977676882330687, + "loss": 3.1304, + "theoretical_loss": 3.4191845941151504, + "tokens_seen": 2080374784 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.48189568519592285, + "objective/train/docs_used": 1178616, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7977302074432373, + "objective/train/original_loss": 2.7977304458618164, + "objective/train/theoretical_loss": 3.4191311327357505, + "objective/train/tokens_used": 2101228000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23650392889976501, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493916273117065, + "objective/train/weighted_lm_loss": 2.936192512512207, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9538434743881226, + "theoretical_loss": 3.4191311327357505, + "tokens_seen": 2080768000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00012958758986000756, + "loss": 3.0905, + "theoretical_loss": 3.4190420591677713, + "tokens_seen": 2081423360 + }, + { + "epoch": 0.74, + "learning_rate": 0.00012939841089670828, + "loss": 3.0506, + "theoretical_loss": 3.4188996161024745, + "tokens_seen": 2082471936 + }, + { + "epoch": 0.74, + "learning_rate": 0.000129209231933409, + "loss": 3.047, + "theoretical_loss": 3.4187572648138107, + "tokens_seen": 2083520512 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.4861668646335602, + "objective/train/docs_used": 1180033, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.336259603500366, + "objective/train/original_loss": 3.3362598419189453, + "objective/train/theoretical_loss": 3.4186861235528108, + "objective/train/tokens_used": 2104504800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24133270978927612, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498433113098145, + "objective/train/weighted_lm_loss": 3.501760244369507, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9516600370407104, + "theoretical_loss": 3.4186861235528108, + "tokens_seen": 2084044800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00012902005297010972, + "loss": 3.1077, + "theoretical_loss": 3.4186150051965036, + "tokens_seen": 2084569088 + }, + { + "epoch": 0.74, + "learning_rate": 0.00012883087400681045, + "loss": 3.1232, + "theoretical_loss": 3.418472837145451, + "tokens_seen": 2085617664 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012864169504351117, + "loss": 3.0959, + "theoretical_loss": 3.4183307605557247, + "tokens_seen": 2086666240 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.4884311556816101, + "objective/train/docs_used": 1182648, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9421818256378174, + "objective/train/original_loss": 2.9421820640563965, + "objective/train/theoretical_loss": 3.4182420090857955, + "objective/train/tokens_used": 2107781600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24259597063064575, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500762462615967, + "objective/train/weighted_lm_loss": 3.0886693000793457, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9524582028388977, + "theoretical_loss": 3.4182420090857955, + "tokens_seen": 2087321600 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001284525160802119, + "loss": 3.036, + "theoretical_loss": 3.418188775322567, + "tokens_seen": 2087714816 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001282633371169126, + "loss": 3.0956, + "theoretical_loss": 3.418046881341395, + "tokens_seen": 2088763392 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001280741581536133, + "loss": 3.1281, + "theoretical_loss": 3.417905078507798, + "tokens_seen": 2089811968 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.48270413279533386, + "objective/train/docs_used": 1184346, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0532760620117188, + "objective/train/original_loss": 3.0532760620117188, + "objective/train/theoretical_loss": 3.4177987861354815, + "objective/train/tokens_used": 2111058400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24031376838684082, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494917631149292, + "objective/train/weighted_lm_loss": 3.203695297241211, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.952264666557312, + "theoretical_loss": 3.4177987861354815, + "tokens_seen": 2090598400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012788497919031402, + "loss": 3.0645, + "theoretical_loss": 3.4177633667175344, + "tokens_seen": 2090860544 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012769580022701477, + "loss": 3.1287, + "theoretical_loss": 3.417621745866537, + "tokens_seen": 2091909120 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001275066212637155, + "loss": 3.1295, + "theoretical_loss": 3.4174802158509086, + "tokens_seen": 2092957696 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.487575501203537, + "objective/train/docs_used": 1186411, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.890275001525879, + "objective/train/original_loss": 2.890275001525879, + "objective/train/theoretical_loss": 3.4173564515190753, + "objective/train/tokens_used": 2114335200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2412995547056198, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049984097480774, + "objective/train/weighted_lm_loss": 3.034238815307617, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9572398662567139, + "theoretical_loss": 3.4173564515190753, + "tokens_seen": 2093875200 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001273174423004162, + "loss": 3.1029, + "theoretical_loss": 3.4173387765669228, + "tokens_seen": 2094006272 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001271282633371169, + "loss": 3.0415, + "theoretical_loss": 3.4171974279110224, + "tokens_seen": 2095054848 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012693908437381762, + "loss": 3.1255, + "theoretical_loss": 3.417056169779822, + "tokens_seen": 2096103424 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.4885167181491852, + "objective/train/docs_used": 1188334, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.939175844192505, + "objective/train/original_loss": 2.939175605773926, + "objective/train/theoretical_loss": 3.4169150020701045, + "objective/train/tokens_used": 2117612000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24050042033195496, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500741004943848, + "objective/train/weighted_lm_loss": 3.0871052742004395, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9530365467071533, + "theoretical_loss": 3.4169150020701045, + "tokens_seen": 2097152000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012674990541051834, + "loss": 3.0906, + "theoretical_loss": 3.4169150020701045, + "tokens_seen": 2097152000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001265607264472191, + "loss": 3.0661, + "theoretical_loss": 3.4167739246788225, + "tokens_seen": 2098200576 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012637154748391979, + "loss": 3.0931, + "theoretical_loss": 3.4166329375030973, + "tokens_seen": 2099249152 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001261823685206205, + "loss": 3.0467, + "theoretical_loss": 3.416492040440219, + "tokens_seen": 2100297728 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.4632844030857086, + "objective/train/docs_used": 1190051, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.067115545272827, + "objective/train/original_loss": 3.0671157836914062, + "objective/train/theoretical_loss": 3.4164744346383094, + "objective/train/tokens_used": 2120888800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23205097019672394, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047507643699646, + "objective/train/weighted_lm_loss": 3.2131385803222656, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.951475977897644, + "theoretical_loss": 3.4164744346383094, + "tokens_seen": 2100428800 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012599318955732123, + "loss": 3.0882, + "theoretical_loss": 3.416351233387645, + "tokens_seen": 2101346304 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012580401059402195, + "loss": 3.1133, + "theoretical_loss": 3.4162105162430008, + "tokens_seen": 2102394880 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012561483163072264, + "loss": 3.1099, + "theoretical_loss": 3.4160698889040804, + "tokens_seen": 2103443456 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.47364890575408936, + "objective/train/docs_used": 1192176, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6278741359710693, + "objective/train/original_loss": 2.6278746128082275, + "objective/train/theoretical_loss": 3.416034746089533, + "objective/train/tokens_used": 2124165600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23315325379371643, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485498905181885, + "objective/train/weighted_lm_loss": 2.7560436725616455, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9518656730651855, + "theoretical_loss": 3.416034746089533, + "tokens_seen": 2103705600 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001254256526674234, + "loss": 3.1199, + "theoretical_loss": 3.4159293512688436, + "tokens_seen": 2104492032 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001252364737041241, + "loss": 3.1159, + "theoretical_loss": 3.415788903235418, + "tokens_seen": 2105540608 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012504729474082483, + "loss": 3.0587, + "theoretical_loss": 3.415648544702096, + "tokens_seen": 2106589184 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.47495296597480774, + "objective/train/docs_used": 1193800, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.238267660140991, + "objective/train/original_loss": 3.238267421722412, + "objective/train/theoretical_loss": 3.4155959333056156, + "objective/train/tokens_used": 2127442400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23427124321460724, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048686146736145, + "objective/train/weighted_lm_loss": 3.392909288406372, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.952079713344574, + "theoretical_loss": 3.4155959333056156, + "tokens_seen": 2106982400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012485811577752555, + "loss": 3.0348, + "theoretical_loss": 3.4155082755673374, + "tokens_seen": 2107637760 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012466893681422627, + "loss": 3.0607, + "theoretical_loss": 3.415368095729767, + "tokens_seen": 2108686336 + }, + { + "epoch": 0.75, + "learning_rate": 0.000124479757850927, + "loss": 3.1031, + "theoretical_loss": 3.415228005088175, + "tokens_seen": 2109734912 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.46726658940315247, + "objective/train/docs_used": 1195692, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0192675590515137, + "objective/train/original_loss": 3.0192675590515137, + "objective/train/theoretical_loss": 3.4151579931842884, + "objective/train/tokens_used": 2130719200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2279476523399353, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0478851795196533, + "objective/train/weighted_lm_loss": 3.1648991107940674, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9538955688476562, + "theoretical_loss": 3.4151579931842884, + "tokens_seen": 2110259200 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012429057888762768, + "loss": 3.1491, + "theoretical_loss": 3.4150880035415168, + "tokens_seen": 2110783488 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012410139992432843, + "loss": 3.0926, + "theoretical_loss": 3.4149480909889123, + "tokens_seen": 2111832064 + }, + { + "epoch": 0.75, + "learning_rate": 0.00012391222096102913, + "loss": 3.1851, + "theoretical_loss": 3.4148082673296445, + "tokens_seen": 2112880640 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.4854118824005127, + "objective/train/docs_used": 1197624, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.756822109222412, + "objective/train/original_loss": 2.756822347640991, + "objective/train/theoretical_loss": 3.4147209226390647, + "objective/train/tokens_used": 2133996000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24401891231536865, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497814416885376, + "objective/train/weighted_lm_loss": 2.893664836883545, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9514028429985046, + "theoretical_loss": 3.4147209226390647, + "tokens_seen": 2113536000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012372304199772985, + "loss": 3.1322, + "theoretical_loss": 3.4146685324631627, + "tokens_seen": 2113929216 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012353386303443057, + "loss": 3.0965, + "theoretical_loss": 3.4145288862890775, + "tokens_seen": 2114977792 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001233446840711313, + "loss": 3.1856, + "theoretical_loss": 3.4143893287071636, + "tokens_seen": 2116026368 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.47289353609085083, + "objective/train/docs_used": 1198832, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1778388023376465, + "objective/train/original_loss": 3.1778388023376465, + "objective/train/theoretical_loss": 3.4142847185991414, + "objective/train/tokens_used": 2137272800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2338077276945114, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484776496887207, + "objective/train/weighted_lm_loss": 3.3315629959106445, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.952247142791748, + "theoretical_loss": 3.4142847185991414, + "tokens_seen": 2116812800 + }, + { + "epoch": 0.76, + "learning_rate": 0.000123155505107832, + "loss": 3.1357, + "theoretical_loss": 3.4142498596173594, + "tokens_seen": 2117074944 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012296632614453273, + "loss": 3.0476, + "theoretical_loss": 3.4141104789197634, + "tokens_seen": 2118123520 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012277714718123345, + "loss": 3.0772, + "theoretical_loss": 3.413971186514639, + "tokens_seen": 2119172096 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.48130467534065247, + "objective/train/docs_used": 1201065, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8641321659088135, + "objective/train/original_loss": 2.8641324043273926, + "objective/train/theoretical_loss": 3.4138493780092887, + "objective/train/tokens_used": 2140549600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2385849952697754, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493431091308594, + "objective/train/weighted_lm_loss": 3.0060529708862305, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9514316320419312, + "theoretical_loss": 3.4138493780092887, + "tokens_seen": 2120089600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012258796821793417, + "loss": 3.1735, + "theoretical_loss": 3.4138319823024093, + "tokens_seen": 2120220672 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001223987892546349, + "loss": 3.0906, + "theoretical_loss": 3.4136928661836605, + "tokens_seen": 2121269248 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001222096102913356, + "loss": 3.1384, + "theoretical_loss": 3.413553838059139, + "tokens_seen": 2122317824 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.4823879301548004, + "objective/train/docs_used": 1202332, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.959242582321167, + "objective/train/original_loss": 2.959242582321167, + "objective/train/theoretical_loss": 3.4134148978297523, + "objective/train/tokens_used": 2143826400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2378014326095581, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494475364685059, + "objective/train/weighted_lm_loss": 3.1045055389404297, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9571212530136108, + "theoretical_loss": 3.4134148978297523, + "tokens_seen": 2123366400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012202043132803632, + "loss": 3.0108, + "theoretical_loss": 3.4134148978297523, + "tokens_seen": 2123366400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012183125236473705, + "loss": 3.0749, + "theoretical_loss": 3.413276045396567, + "tokens_seen": 2124414976 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012164207340143776, + "loss": 3.1091, + "theoretical_loss": 3.413137280660813, + "tokens_seen": 2125463552 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012145289443813848, + "loss": 3.0921, + "theoretical_loss": 3.412998603523877, + "tokens_seen": 2126512128 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.48719412088394165, + "objective/train/docs_used": 1204214, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.252816915512085, + "objective/train/original_loss": 3.252816915512085, + "objective/train/theoretical_loss": 3.412981275036147, + "objective/train/tokens_used": 2147103200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2436467409133911, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499577522277832, + "objective/train/weighted_lm_loss": 3.4153482913970947, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9514138698577881, + "theoretical_loss": 3.412981275036147, + "tokens_seen": 2126643200 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001212637154748392, + "loss": 3.0569, + "theoretical_loss": 3.4128600138873066, + "tokens_seen": 2127560704 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012107453651153992, + "loss": 3.0342, + "theoretical_loss": 3.4127215116528076, + "tokens_seen": 2128609280 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012088535754824064, + "loss": 3.0179, + "theoretical_loss": 3.412583096722245, + "tokens_seen": 2129657856 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.48841291666030884, + "objective/train/docs_used": 1206282, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7933783531188965, + "objective/train/original_loss": 2.7933781147003174, + "objective/train/theoretical_loss": 3.41254850661936, + "objective/train/tokens_used": 2150380000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2410411387681961, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500664710998535, + "objective/train/weighted_lm_loss": 2.9343910217285156, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9590831995010376, + "theoretical_loss": 3.41254850661936, + "tokens_seen": 2129920000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012069617858494136, + "loss": 3.0067, + "theoretical_loss": 3.412444768997643, + "tokens_seen": 2130706432 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012050699962164208, + "loss": 3.0622, + "theoretical_loss": 3.4123065283811833, + "tokens_seen": 2131755008 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012031782065834279, + "loss": 3.0901, + "theoretical_loss": 3.412168374775204, + "tokens_seen": 2132803584 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.48633265495300293, + "objective/train/docs_used": 1208010, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.088775873184204, + "objective/train/original_loss": 3.088775396347046, + "objective/train/theoretical_loss": 3.412116589585446, + "objective/train/tokens_used": 2153656800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23890967667102814, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498476028442383, + "objective/train/weighted_lm_loss": 3.2435507774353027, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9601168036460876, + "theoretical_loss": 3.412116589585446, + "tokens_seen": 2133196800 + }, + { + "epoch": 0.76, + "learning_rate": 0.00012012864169504352, + "loss": 3.0086, + "theoretical_loss": 3.412030308082203, + "tokens_seen": 2133852160 + }, + { + "epoch": 0.76, + "learning_rate": 0.00011993946273174423, + "loss": 3.1356, + "theoretical_loss": 3.411892328204834, + "tokens_seen": 2134900736 + }, + { + "epoch": 0.76, + "learning_rate": 0.00011975028376844495, + "loss": 3.0847, + "theoretical_loss": 3.411754435045907, + "tokens_seen": 2135949312 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.48867544531822205, + "objective/train/docs_used": 1209970, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9987714290618896, + "objective/train/original_loss": 2.9987711906433105, + "objective/train/theoretical_loss": 3.4116855209555306, + "objective/train/tokens_used": 2156933600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24339966475963593, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501048564910889, + "objective/train/weighted_lm_loss": 3.1480982303619385, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9592180848121643, + "theoretical_loss": 3.4116855209555306, + "tokens_seen": 2136473600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00011956110480514567, + "loss": 3.0679, + "theoretical_loss": 3.4116166285083898, + "tokens_seen": 2136997888 + }, + { + "epoch": 0.76, + "learning_rate": 0.00011937192584184639, + "loss": 3.0748, + "theoretical_loss": 3.411478908495406, + "tokens_seen": 2138046464 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001191827468785471, + "loss": 3.0104, + "theoretical_loss": 3.411341274910234, + "tokens_seen": 2139095040 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.4596433937549591, + "objective/train/docs_used": 1212088, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6375980377197266, + "objective/train/original_loss": 2.6375980377197266, + "objective/train/theoretical_loss": 3.4112552977657105, + "objective/train/tokens_used": 2160210400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23229673504829407, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0471444129943848, + "objective/train/weighted_lm_loss": 2.7628188133239746, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9519481658935547, + "theoretical_loss": 3.4112552977657105, + "tokens_seen": 2139750400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00011899356791524783, + "loss": 3.0396, + "theoretical_loss": 3.411203727656308, + "tokens_seen": 2140143616 + }, + { + "epoch": 0.76, + "learning_rate": 0.00011880438895194855, + "loss": 3.0234, + "theoretical_loss": 3.411066266637219, + "tokens_seen": 2141192192 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011861520998864926, + "loss": 3.0856, + "theoretical_loss": 3.41092889175671, + "tokens_seen": 2142240768 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.4793272912502289, + "objective/train/docs_used": 1213995, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8040668964385986, + "objective/train/original_loss": 2.8040671348571777, + "objective/train/theoretical_loss": 3.410825917066955, + "objective/train/tokens_used": 2163487200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23690885305404663, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049136996269226, + "objective/train/weighted_lm_loss": 2.9428248405456543, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9542986154556274, + "theoretical_loss": 3.410825917066955, + "tokens_seen": 2143027200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011842603102535, + "loss": 3.0692, + "theoretical_loss": 3.4107916029186804, + "tokens_seen": 2143289344 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001182368520620507, + "loss": 3.067, + "theoretical_loss": 3.410654400027184, + "tokens_seen": 2144337920 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011804767309875142, + "loss": 3.0471, + "theoretical_loss": 3.4105172829864268, + "tokens_seen": 2145386496 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.4841189980506897, + "objective/train/docs_used": 1215500, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.244422197341919, + "objective/train/original_loss": 3.2444229125976562, + "objective/train/theoretical_loss": 3.4103973759250095, + "objective/train/tokens_used": 2166764000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2405548393726349, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496346950531006, + "objective/train/weighted_lm_loss": 3.404898166656494, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9524633288383484, + "theoretical_loss": 3.4103973759250095, + "tokens_seen": 2146304000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011785849413545214, + "loss": 3.0911, + "theoretical_loss": 3.4103802517007695, + "tokens_seen": 2146435072 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011766931517215286, + "loss": 3.0649, + "theoretical_loss": 3.410243306074726, + "tokens_seen": 2147483648 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011748013620885357, + "loss": 3.1124, + "theoretical_loss": 3.4101064460129624, + "tokens_seen": 2148532224 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.4838142693042755, + "objective/train/docs_used": 1217649, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9245572090148926, + "objective/train/original_loss": 2.9245574474334717, + "objective/train/theoretical_loss": 3.4099696714202983, + "objective/train/tokens_used": 2170040800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24016062915325165, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496021509170532, + "objective/train/weighted_lm_loss": 3.0698323249816895, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9524503946304321, + "theoretical_loss": 3.4099696714202983, + "tokens_seen": 2149580800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001172909572455543, + "loss": 3.1153, + "theoretical_loss": 3.4099696714202983, + "tokens_seen": 2149580800 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011710177828225501, + "loss": 3.0984, + "theoretical_loss": 3.4098329822017055, + "tokens_seen": 2150629376 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011691259931895573, + "loss": 3.1195, + "theoretical_loss": 3.4096963782623058, + "tokens_seen": 2151677952 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011672342035565647, + "loss": 3.108, + "theoretical_loss": 3.4095598595073753, + "tokens_seen": 2152726528 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.47976046800613403, + "objective/train/docs_used": 1219475, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9882757663726807, + "objective/train/original_loss": 2.9882755279541016, + "objective/train/theoretical_loss": 3.40954280064783, + "objective/train/tokens_used": 2173317600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23530583083629608, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049172043800354, + "objective/train/weighted_lm_loss": 3.135436773300171, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9584304690361023, + "theoretical_loss": 3.40954280064783, + "tokens_seen": 2152857600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011653424139235717, + "loss": 3.1106, + "theoretical_loss": 3.4094234258423395, + "tokens_seen": 2153775104 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011634506242905789, + "loss": 3.1039, + "theoretical_loss": 3.4092870771727766, + "tokens_seen": 2154823680 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011615588346575861, + "loss": 3.0136, + "theoretical_loss": 3.409150813404413, + "tokens_seen": 2155872256 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.4865594506263733, + "objective/train/docs_used": 1221633, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0492663383483887, + "objective/train/original_loss": 3.0492663383483887, + "objective/train/theoretical_loss": 3.409116760717102, + "objective/train/tokens_used": 2176594400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24001134932041168, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049876093864441, + "objective/train/weighted_lm_loss": 3.2022552490234375, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9568141102790833, + "theoretical_loss": 3.409116760717102, + "tokens_seen": 2156134400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011596670450245933, + "loss": 3.0194, + "theoretical_loss": 3.409014634443128, + "tokens_seen": 2156920832 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011577752553916004, + "loss": 3.0251, + "theoretical_loss": 3.408878540194949, + "tokens_seen": 2157969408 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011558834657586078, + "loss": 3.0482, + "theoretical_loss": 3.4087425305660544, + "tokens_seen": 2159017984 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.48565536737442017, + "objective/train/docs_used": 1223129, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.323091745376587, + "objective/train/original_loss": 2.323091983795166, + "objective/train/theoretical_loss": 3.4086915487520044, + "objective/train/tokens_used": 2179871200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23969988524913788, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497839450836182, + "objective/train/weighted_lm_loss": 2.4400320053100586, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9515894055366516, + "theoretical_loss": 3.4086915487520044, + "tokens_seen": 2159411200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011539916761256148, + "loss": 3.0768, + "theoretical_loss": 3.4086066054627713, + "tokens_seen": 2160066560 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001152099886492622, + "loss": 3.049, + "theoretical_loss": 3.408470764791576, + "tokens_seen": 2161115136 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011502080968596292, + "loss": 3.0416, + "theoretical_loss": 3.408335008459094, + "tokens_seen": 2162163712 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.488086462020874, + "objective/train/docs_used": 1224854, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0033509731292725, + "objective/train/original_loss": 3.0033512115478516, + "objective/train/theoretical_loss": 3.4082671618907314, + "objective/train/tokens_used": 2183148000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24069108068943024, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050032138824463, + "objective/train/weighted_lm_loss": 3.1541175842285156, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9704567790031433, + "theoretical_loss": 3.4082671618907314, + "tokens_seen": 2162688000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011483163072266364, + "loss": 3.0526, + "theoretical_loss": 3.408199336372099, + "tokens_seen": 2163212288 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011464245175936436, + "loss": 3.1291, + "theoretical_loss": 3.4080637484375127, + "tokens_seen": 2164260864 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011445327279606509, + "loss": 3.0743, + "theoretical_loss": 3.407928244562405, + "tokens_seen": 2165309440 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.4888122081756592, + "objective/train/docs_used": 1226563, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.946284770965576, + "objective/train/original_loss": 2.9462850093841553, + "objective/train/theoretical_loss": 3.407843597285684, + "objective/train/tokens_used": 2186424800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24123455584049225, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501073598861694, + "objective/train/weighted_lm_loss": 3.094599485397339, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9530252814292908, + "theoretical_loss": 3.407843597285684, + "tokens_seen": 2165964800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001142640938327658, + "loss": 3.1164, + "theoretical_loss": 3.4077928246539937, + "tokens_seen": 2166358016 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011407491486946651, + "loss": 3.1127, + "theoretical_loss": 3.407657488619642, + "tokens_seen": 2167406592 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011388573590616725, + "loss": 3.1488, + "theoretical_loss": 3.407522236366863, + "tokens_seen": 2168455168 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.48051032423973083, + "objective/train/docs_used": 1228356, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.281541585922241, + "objective/train/original_loss": 3.2815420627593994, + "objective/train/theoretical_loss": 3.4074208521033804, + "objective/train/tokens_used": 2189701600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24015885591506958, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0492717027664185, + "objective/train/weighted_lm_loss": 3.4423675537109375, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9514672160148621, + "theoretical_loss": 3.4074208521033804, + "tokens_seen": 2169241600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00011369655694286795, + "loss": 3.0957, + "theoretical_loss": 3.407387067803314, + "tokens_seen": 2169503744 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011350737797956867, + "loss": 3.1708, + "theoretical_loss": 3.4072519828367995, + "tokens_seen": 2170552320 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001133181990162694, + "loss": 3.1192, + "theoretical_loss": 3.4071169813752706, + "tokens_seen": 2171600896 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.48870110511779785, + "objective/train/docs_used": 1230300, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.97200083732605, + "objective/train/original_loss": 2.972001075744629, + "objective/train/theoretical_loss": 3.4069989235243634, + "objective/train/tokens_used": 2192978400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24322699010372162, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050106406211853, + "objective/train/weighted_lm_loss": 3.1200037002563477, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.955571711063385, + "theoretical_loss": 3.4069989235243634, + "tokens_seen": 2172518400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011312902005297012, + "loss": 3.0742, + "theoretical_loss": 3.406982063326823, + "tokens_seen": 2172649472 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011293984108967082, + "loss": 3.102, + "theoretical_loss": 3.4068472285996987, + "tokens_seen": 2173698048 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011275066212637156, + "loss": 3.0766, + "theoretical_loss": 3.4067124771022845, + "tokens_seen": 2174746624 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.47267434000968933, + "objective/train/docs_used": 1232394, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9665775299072266, + "objective/train/original_loss": 2.9665770530700684, + "objective/train/theoretical_loss": 3.4065778087431124, + "objective/train/tokens_used": 2196255200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23377160727977753, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048455834388733, + "objective/train/weighted_lm_loss": 3.112847328186035, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9565809965133667, + "theoretical_loss": 3.4065778087431124, + "tokens_seen": 2175795200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011256148316307228, + "loss": 3.061, + "theoretical_loss": 3.4065778087431124, + "tokens_seen": 2175795200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011237230419977298, + "loss": 3.0844, + "theoretical_loss": 3.406443223430858, + "tokens_seen": 2176843776 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011218312523647372, + "loss": 3.0302, + "theoretical_loss": 3.4063087210743426, + "tokens_seen": 2177892352 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011199394627317443, + "loss": 3.0247, + "theoretical_loss": 3.4061743015825305, + "tokens_seen": 2178940928 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.485377699136734, + "objective/train/docs_used": 1233537, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1749722957611084, + "objective/train/original_loss": 3.1749720573425293, + "objective/train/theoretical_loss": 3.40615750496795, + "objective/train/tokens_used": 2199532000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24226607382297516, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497691631317139, + "objective/train/weighted_lm_loss": 3.332637310028076, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9522126317024231, + "theoretical_loss": 3.40615750496795, + "tokens_seen": 2179072000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011180476730987515, + "loss": 3.1218, + "theoretical_loss": 3.4060399648645294, + "tokens_seen": 2179989504 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011161558834657587, + "loss": 3.0676, + "theoretical_loss": 3.4059057108295914, + "tokens_seen": 2181038080 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011142640938327659, + "loss": 3.0838, + "theoretical_loss": 3.4057715393871097, + "tokens_seen": 2182086656 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.4907640814781189, + "objective/train/docs_used": 1235503, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.107961654663086, + "objective/train/original_loss": 3.107961654663086, + "objective/train/theoretical_loss": 3.405738009420957, + "objective/train/tokens_used": 2202808800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24375276267528534, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503153800964355, + "objective/train/weighted_lm_loss": 3.2649710178375244, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9541966319084167, + "theoretical_loss": 3.405738009420957, + "tokens_seen": 2182348800 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001112372304199773, + "loss": 3.0817, + "theoretical_loss": 3.4056374504466236, + "tokens_seen": 2183135232 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011104805145667803, + "loss": 3.1127, + "theoretical_loss": 3.405503443917811, + "tokens_seen": 2184183808 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011085887249337873, + "loss": 3.1144, + "theoretical_loss": 3.4053695197104945, + "tokens_seen": 2185232384 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.4896021783351898, + "objective/train/docs_used": 1237335, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.074671506881714, + "objective/train/original_loss": 3.0746712684631348, + "objective/train/theoretical_loss": 3.4053193193378806, + "objective/train/tokens_used": 2206085600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24403336644172668, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502007007598877, + "objective/train/weighted_lm_loss": 3.2284257411956787, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9618966579437256, + "theoretical_loss": 3.4053193193378806, + "tokens_seen": 2185625600 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011066969353007946, + "loss": 3.1068, + "theoretical_loss": 3.4052356777346384, + "tokens_seen": 2186280960 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011048051456678018, + "loss": 3.0704, + "theoretical_loss": 3.4051019179003474, + "tokens_seen": 2187329536 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001102913356034809, + "loss": 3.0601, + "theoretical_loss": 3.404968240117869, + "tokens_seen": 2188378112 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.47622618079185486, + "objective/train/docs_used": 1239239, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.253549575805664, + "objective/train/original_loss": 3.253549575805664, + "objective/train/theoretical_loss": 3.4049014319680495, + "objective/train/tokens_used": 2209362400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2409483939409256, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488470792770386, + "objective/train/weighted_lm_loss": 3.4102189540863037, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9515026807785034, + "theoretical_loss": 3.4049014319680495, + "tokens_seen": 2188902400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011010215664018162, + "loss": 3.0343, + "theoretical_loss": 3.40483464429759, + "tokens_seen": 2189426688 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010991297767688234, + "loss": 3.0409, + "theoretical_loss": 3.4047011303500394, + "tokens_seen": 2190475264 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010972379871358306, + "loss": 3.0841, + "theoretical_loss": 3.404567698185886, + "tokens_seen": 2191523840 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.4836944341659546, + "objective/train/docs_used": 1240983, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.245880365371704, + "objective/train/original_loss": 3.245880603790283, + "objective/train/theoretical_loss": 3.404484344574285, + "objective/train/tokens_used": 2212639200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23999130725860596, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495892763137817, + "objective/train/weighted_lm_loss": 3.4061005115509033, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.951469898223877, + "theoretical_loss": 3.404484344574285, + "tokens_seen": 2192179200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010953461975028377, + "loss": 3.0935, + "theoretical_loss": 3.4044343477159393, + "tokens_seen": 2192572416 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001093454407869845, + "loss": 3.1178, + "theoretical_loss": 3.4043010788511476, + "tokens_seen": 2193620992 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001091562618236852, + "loss": 3.0871, + "theoretical_loss": 3.4041678915025995, + "tokens_seen": 2194669568 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.4901869297027588, + "objective/train/docs_used": 1242635, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.035893201828003, + "objective/train/original_loss": 3.035892963409424, + "objective/train/theoretical_loss": 3.404068054432816, + "objective/train/tokens_used": 2215916000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2428739219903946, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502533912658691, + "objective/train/weighted_lm_loss": 3.1888182163238525, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9534474611282349, + "theoretical_loss": 3.404068054432816, + "tokens_seen": 2195456000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010896708286038593, + "loss": 3.1038, + "theoretical_loss": 3.404034785581523, + "tokens_seen": 2195718144 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010877790389708665, + "loss": 3.1226, + "theoretical_loss": 3.4039017609992848, + "tokens_seen": 2196766720 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010858872493378737, + "loss": 3.1207, + "theoretical_loss": 3.4037688176673906, + "tokens_seen": 2197815296 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.48253607749938965, + "objective/train/docs_used": 1244165, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.643556594848633, + "objective/train/original_loss": 2.6435563564300537, + "objective/train/theoretical_loss": 3.4036525588331927, + "objective/train/tokens_used": 2219192800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24120163917541504, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049479365348816, + "objective/train/weighted_lm_loss": 2.772040843963623, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9570202231407166, + "theoretical_loss": 3.4036525588331927, + "tokens_seen": 2198732800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010839954597048807, + "loss": 3.0908, + "theoretical_loss": 3.403635955497484, + "tokens_seen": 2198863872 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010821036700718881, + "loss": 3.0805, + "theoretical_loss": 3.4035031744013473, + "tokens_seen": 2199912448 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010802118804388953, + "loss": 3.0845, + "theoretical_loss": 3.4033704742909006, + "tokens_seen": 2200961024 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.47237733006477356, + "objective/train/docs_used": 1245946, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.21795916557312, + "objective/train/original_loss": 3.21795916557312, + "objective/train/theoretical_loss": 3.403237855078202, + "objective/train/tokens_used": 2222469600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23783241212368011, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484466552734375, + "objective/train/weighted_lm_loss": 3.377448797225952, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9808477163314819, + "theoretical_loss": 3.403237855078202, + "tokens_seen": 2202009600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010783200908059024, + "loss": 3.0344, + "theoretical_loss": 3.403237855078202, + "tokens_seen": 2202009600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010764283011729097, + "loss": 3.0654, + "theoretical_loss": 3.403105316675445, + "tokens_seen": 2203058176 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010745365115399168, + "loss": 3.1111, + "theoretical_loss": 3.402972858994963, + "tokens_seen": 2204106752 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001072644721906924, + "loss": 3.1596, + "theoretical_loss": 3.402840481949224, + "tokens_seen": 2205155328 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.4866069257259369, + "objective/train/docs_used": 1246233, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1187005043029785, + "objective/train/original_loss": 3.1187007427215576, + "objective/train/theoretical_loss": 3.4028239404837826, + "objective/train/tokens_used": 2225746400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2408287674188614, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498849153518677, + "objective/train/weighted_lm_loss": 3.2743141651153564, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9550109505653381, + "theoretical_loss": 3.4028239404837826, + "tokens_seen": 2205286400 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010707529322739312, + "loss": 3.2249, + "theoretical_loss": 3.402708185450833, + "tokens_seen": 2206203904 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010688611426409384, + "loss": 3.3303, + "theoretical_loss": 3.4025759694125317, + "tokens_seen": 2207252480 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010669693530079455, + "loss": 3.2622, + "theoretical_loss": 3.4024438337471974, + "tokens_seen": 2208301056 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.4907922148704529, + "objective/train/docs_used": 1246233, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8649120330810547, + "objective/train/original_loss": 2.8649120330810547, + "objective/train/theoretical_loss": 3.4024108123789434, + "objective/train/tokens_used": 2229023200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24190065264701843, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503088235855103, + "objective/train/weighted_lm_loss": 3.0094716548919678, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9557651281356812, + "theoretical_loss": 3.4024108123789434, + "tokens_seen": 2208563200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010650775633749528, + "loss": 3.3453, + "theoretical_loss": 3.4023117783678436, + "tokens_seen": 2209349632 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010631857737419599, + "loss": 3.3543, + "theoretical_loss": 3.4021798031876176, + "tokens_seen": 2210398208 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010612939841089671, + "loss": 3.441, + "theoretical_loss": 3.4020479081198034, + "tokens_seen": 2211446784 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.45996737480163574, + "objective/train/docs_used": 1246233, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2608132362365723, + "objective/train/original_loss": 3.2608132362365723, + "objective/train/theoretical_loss": 3.4019984681056785, + "objective/train/tokens_used": 2232300000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2254868596792221, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0471423864364624, + "objective/train/weighted_lm_loss": 3.418138265609741, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9537967443466187, + "theoretical_loss": 3.4019984681056785, + "tokens_seen": 2211840000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010594021944759744, + "loss": 3.4698, + "theoretical_loss": 3.4019160930778196, + "tokens_seen": 2212495360 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010575104048429815, + "loss": 3.5284, + "theoretical_loss": 3.401784357975218, + "tokens_seen": 2213543936 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010556186152099887, + "loss": 3.4935, + "theoretical_loss": 3.401652702725687, + "tokens_seen": 2214592512 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.48440462350845337, + "objective/train/docs_used": 1248119, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.49127459526062, + "objective/train/original_loss": 3.49127459526062, + "objective/train/theoretical_loss": 3.401586905018886, + "objective/train/tokens_used": 2235576800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23930440843105316, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496569871902466, + "objective/train/weighted_lm_loss": 3.665234327316284, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9591064453125, + "theoretical_loss": 3.401586905018886, + "tokens_seen": 2215116800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010537268255769959, + "loss": 3.5465, + "theoretical_loss": 3.401521127243046, + "tokens_seen": 2215641088 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010518350359440031, + "loss": 3.4807, + "theoretical_loss": 3.4013896314412517, + "tokens_seen": 2216689664 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010499432463110102, + "loss": 3.387, + "theoretical_loss": 3.401258215234391, + "tokens_seen": 2217738240 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.4825243651866913, + "objective/train/docs_used": 1249212, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.276275634765625, + "objective/train/original_loss": 3.276275157928467, + "objective/train/theoretical_loss": 3.401176120486286, + "objective/train/tokens_used": 2238853600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2399078756570816, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494717359542847, + "objective/train/weighted_lm_loss": 3.437455415725708, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9523665308952332, + "theoretical_loss": 3.401176120486286, + "tokens_seen": 2218393600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010480514566780175, + "loss": 3.495, + "theoretical_loss": 3.401126878536686, + "tokens_seen": 2218786816 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010461596670450246, + "loss": 3.3819, + "theoretical_loss": 3.400995621262491, + "tokens_seen": 2219835392 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010442678774120318, + "loss": 3.3977, + "theoretical_loss": 3.4008644433262933, + "tokens_seen": 2220883968 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.4869391620159149, + "objective/train/docs_used": 1251383, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7835676670074463, + "objective/train/original_loss": 2.7835679054260254, + "objective/train/theoretical_loss": 3.400766111888339, + "objective/train/tokens_used": 2242130400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24035805463790894, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499156713485718, + "objective/train/weighted_lm_loss": 2.92244815826416, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9542126655578613, + "theoretical_loss": 3.400766111888339, + "tokens_seen": 2221670400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001042376087779039, + "loss": 3.3702, + "theoretical_loss": 3.400733344642712, + "tokens_seen": 2221932544 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010404842981460462, + "loss": 3.4368, + "theoretical_loss": 3.4006023251264987, + "tokens_seen": 2222981120 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010385925085130534, + "loss": 3.3566, + "theoretical_loss": 3.400471384692537, + "tokens_seen": 2224029696 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.4827156364917755, + "objective/train/docs_used": 1252992, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.140798807144165, + "objective/train/original_loss": 3.140798807144165, + "objective/train/theoretical_loss": 3.400356876618167, + "objective/train/tokens_used": 2245407200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23796583712100983, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494810342788696, + "objective/train/weighted_lm_loss": 3.2959017753601074, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9555644392967224, + "theoretical_loss": 3.400356876618167, + "tokens_seen": 2224947200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010367007188800606, + "loss": 3.2769, + "theoretical_loss": 3.4003405232558417, + "tokens_seen": 2225078272 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010348089292470678, + "loss": 3.2955, + "theoretical_loss": 3.4002097407315595, + "tokens_seen": 2226126848 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010329171396140749, + "loss": 3.3102, + "theoretical_loss": 3.4000790370349674, + "tokens_seen": 2227175424 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.4734431505203247, + "objective/train/docs_used": 1254892, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.233670473098755, + "objective/train/original_loss": 3.233670473098755, + "objective/train/theoretical_loss": 3.3999484120814736, + "objective/train/tokens_used": 2248684000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23569948971271515, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485421419143677, + "objective/train/weighted_lm_loss": 3.390467405319214, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9522518515586853, + "theoretical_loss": 3.3999484120814736, + "tokens_seen": 2228224000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010310253499810822, + "loss": 3.3266, + "theoretical_loss": 3.3999484120814736, + "tokens_seen": 2228224000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010291335603480893, + "loss": 3.2585, + "theoretical_loss": 3.399817865786617, + "tokens_seen": 2229272576 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010272417707150965, + "loss": 3.2397, + "theoretical_loss": 3.399687398066067, + "tokens_seen": 2230321152 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010253499810821037, + "loss": 3.1565, + "theoretical_loss": 3.3995570088356217, + "tokens_seen": 2231369728 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.49023377895355225, + "objective/train/docs_used": 1257149, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8337886333465576, + "objective/train/original_loss": 2.8337888717651367, + "objective/train/theoretical_loss": 3.399540715696463, + "objective/train/tokens_used": 2251960800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2429656982421875, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502585172653198, + "objective/train/weighted_lm_loss": 2.9760193824768066, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9533647894859314, + "theoretical_loss": 3.399540715696463, + "tokens_seen": 2231500800 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010234581914491109, + "loss": 3.2025, + "theoretical_loss": 3.3994266980112107, + "tokens_seen": 2232418304 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001021566401816118, + "loss": 3.2688, + "theoretical_loss": 3.3992964655088915, + "tokens_seen": 2233466880 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010196746121831253, + "loss": 3.2317, + "theoretical_loss": 3.3991663112448522, + "tokens_seen": 2234515456 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.48362892866134644, + "objective/train/docs_used": 1259222, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9153990745544434, + "objective/train/original_loss": 2.9153990745544434, + "objective/train/theoretical_loss": 3.3991337848937637, + "objective/train/tokens_used": 2255237600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23721912503242493, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495686531066895, + "objective/train/weighted_lm_loss": 3.060173273086548, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.954319179058075, + "theoretical_loss": 3.3991337848937637, + "tokens_seen": 2234777600 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010177828225501324, + "loss": 3.1987, + "theoretical_loss": 3.3990362351354086, + "tokens_seen": 2235564032 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010158910329171396, + "loss": 3.154, + "theoretical_loss": 3.3989062370970062, + "tokens_seen": 2236612608 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001013999243284147, + "loss": 3.2095, + "theoretical_loss": 3.3987763170462184, + "tokens_seen": 2237661184 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.48375609517097473, + "objective/train/docs_used": 1261230, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0727992057800293, + "objective/train/original_loss": 3.0727992057800293, + "objective/train/theoretical_loss": 3.398727617116349, + "objective/train/tokens_used": 2258514400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23994581401348114, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495952367782593, + "objective/train/weighted_lm_loss": 3.225282907485962, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9582397937774658, + "theoretical_loss": 3.398727617116349, + "tokens_seen": 2238054400 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001012107453651154, + "loss": 3.152, + "theoretical_loss": 3.398646474899747, + "tokens_seen": 2238709760 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010102156640181612, + "loss": 3.1287, + "theoretical_loss": 3.398516710574422, + "tokens_seen": 2239758336 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010083238743851684, + "loss": 3.1316, + "theoretical_loss": 3.3983870239872003, + "tokens_seen": 2240806912 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.48984038829803467, + "objective/train/docs_used": 1263119, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9827420711517334, + "objective/train/original_loss": 2.9827423095703125, + "objective/train/theoretical_loss": 3.398322209819462, + "objective/train/tokens_used": 2261791200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24176624417304993, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502128601074219, + "objective/train/weighted_lm_loss": 3.1325385570526123, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9521621465682983, + "theoretical_loss": 3.398322209819462, + "tokens_seen": 2241331200 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010064320847521756, + "loss": 3.14, + "theoretical_loss": 3.3982574150551663, + "tokens_seen": 2241855488 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010045402951191827, + "loss": 3.125, + "theoretical_loss": 3.3981278836955333, + "tokens_seen": 2242904064 + }, + { + "epoch": 0.8, + "learning_rate": 0.000100264850548619, + "loss": 3.1404, + "theoretical_loss": 3.397998429825639, + "tokens_seen": 2243952640 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.4860360622406006, + "objective/train/docs_used": 1265240, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.894569158554077, + "objective/train/original_loss": 2.8945693969726562, + "objective/train/theoretical_loss": 3.397917560470535, + "objective/train/tokens_used": 2265068000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2396802008152008, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498219728469849, + "objective/train/weighted_lm_loss": 3.038137435913086, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9669562578201294, + "theoretical_loss": 3.397917560470535, + "tokens_seen": 2244608000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010007567158531971, + "loss": 3.1545, + "theoretical_loss": 3.397869053362949, + "tokens_seen": 2245001216 + }, + { + "epoch": 0.8, + "learning_rate": 9.988649262202043e-05, + "loss": 3.0972, + "theoretical_loss": 3.3977397542250563, + "tokens_seen": 2246049792 + }, + { + "epoch": 0.8, + "learning_rate": 9.969731365872115e-05, + "loss": 3.0479, + "theoretical_loss": 3.3976105323296775, + "tokens_seen": 2247098368 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.4735538065433502, + "objective/train/docs_used": 1267114, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.910813093185425, + "objective/train/original_loss": 2.9108128547668457, + "objective/train/theoretical_loss": 3.3975136665491172, + "objective/train/tokens_used": 2268344800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24066385626792908, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485782623291016, + "objective/train/weighted_lm_loss": 3.0512495040893555, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9516804814338684, + "theoretical_loss": 3.3975136665491172, + "tokens_seen": 2247884800 + }, + { + "epoch": 0.8, + "learning_rate": 9.950813469542187e-05, + "loss": 3.0724, + "theoretical_loss": 3.3974813875946577, + "tokens_seen": 2248146944 + }, + { + "epoch": 0.8, + "learning_rate": 9.93189557321226e-05, + "loss": 3.0347, + "theoretical_loss": 3.3973523199379656, + "tokens_seen": 2249195520 + }, + { + "epoch": 0.8, + "learning_rate": 9.912977676882331e-05, + "loss": 3.0484, + "theoretical_loss": 3.397223329277697, + "tokens_seen": 2250244096 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.48258739709854126, + "objective/train/docs_used": 1269266, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.058069944381714, + "objective/train/original_loss": 3.058070182800293, + "objective/train/theoretical_loss": 3.3971105255467977, + "objective/train/tokens_used": 2271621600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24109937250614166, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049484133720398, + "objective/train/weighted_lm_loss": 3.208803415298462, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9516813158988953, + "theoretical_loss": 3.3971105255467977, + "tokens_seen": 2251161600 + }, + { + "epoch": 0.8, + "learning_rate": 9.894059780552403e-05, + "loss": 3.101, + "theoretical_loss": 3.397094415532072, + "tokens_seen": 2251292672 + }, + { + "epoch": 0.8, + "learning_rate": 9.875141884222474e-05, + "loss": 3.0379, + "theoretical_loss": 3.396965578619435, + "tokens_seen": 2252341248 + }, + { + "epoch": 0.8, + "learning_rate": 9.856223987892548e-05, + "loss": 3.1686, + "theoretical_loss": 3.3968368184582562, + "tokens_seen": 2253389824 + }, + { + "epoch": 0.8, + "objective/train/advantage_avg": 0.48647889494895935, + "objective/train/docs_used": 1270683, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8880631923675537, + "objective/train/original_loss": 2.8880629539489746, + "objective/train/theoretical_loss": 3.39670813496713, + "objective/train/tokens_used": 2274898400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2403435856103897, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498695373535156, + "objective/train/weighted_lm_loss": 3.032536268234253, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9517598152160645, + "theoretical_loss": 3.39670813496713, + "tokens_seen": 2254438400 + }, + { + "epoch": 0.81, + "learning_rate": 9.837306091562618e-05, + "loss": 3.0102, + "theoretical_loss": 3.39670813496713, + "tokens_seen": 2254438400 + }, + { + "epoch": 0.81, + "learning_rate": 9.81838819523269e-05, + "loss": 3.0983, + "theoretical_loss": 3.396579528064774, + "tokens_seen": 2255486976 + }, + { + "epoch": 0.81, + "learning_rate": 9.799470298902762e-05, + "loss": 3.0334, + "theoretical_loss": 3.396450997670031, + "tokens_seen": 2256535552 + }, + { + "epoch": 0.81, + "learning_rate": 9.780552402572834e-05, + "loss": 3.1238, + "theoretical_loss": 3.3963225437018663, + "tokens_seen": 2257584128 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.4851101040840149, + "objective/train/docs_used": 1272388, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.497123956680298, + "objective/train/original_loss": 2.497124195098877, + "objective/train/theoretical_loss": 3.3963064923255586, + "objective/train/tokens_used": 2278175200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2395034283399582, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049728274345398, + "objective/train/weighted_lm_loss": 2.621992588043213, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9556369185447693, + "theoretical_loss": 3.3963064923255586, + "tokens_seen": 2257715200 + }, + { + "epoch": 0.81, + "learning_rate": 9.761634506242905e-05, + "loss": 3.0345, + "theoretical_loss": 3.3961941660793697, + "tokens_seen": 2258632704 + }, + { + "epoch": 0.81, + "learning_rate": 9.742716609912979e-05, + "loss": 3.095, + "theoretical_loss": 3.3960658647217534, + "tokens_seen": 2259681280 + }, + { + "epoch": 0.81, + "learning_rate": 9.72379871358305e-05, + "loss": 3.0746, + "theoretical_loss": 3.3959376395483525, + "tokens_seen": 2260729856 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.443075954914093, + "objective/train/docs_used": 1274638, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.932318925857544, + "objective/train/original_loss": 2.932318687438965, + "objective/train/theoretical_loss": 3.395905595149345, + "objective/train/tokens_used": 2281452000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23310764133930206, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.045491099357605, + "objective/train/weighted_lm_loss": 3.069843292236328, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9517802000045776, + "theoretical_loss": 3.395905595149345, + "tokens_seen": 2260992000 + }, + { + "epoch": 0.81, + "learning_rate": 9.704880817253121e-05, + "loss": 3.0252, + "theoretical_loss": 3.3958094904786256, + "tokens_seen": 2261778432 + }, + { + "epoch": 0.81, + "learning_rate": 9.685962920923195e-05, + "loss": 3.0028, + "theoretical_loss": 3.3956814174321526, + "tokens_seen": 2262827008 + }, + { + "epoch": 0.81, + "learning_rate": 9.667045024593265e-05, + "loss": 3.0742, + "theoretical_loss": 3.3955534203286364, + "tokens_seen": 2263875584 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.4872230589389801, + "objective/train/docs_used": 1276376, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.892059326171875, + "objective/train/original_loss": 2.892059326171875, + "objective/train/theoretical_loss": 3.3955054409774936, + "objective/train/tokens_used": 2284728800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2388237714767456, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499361753463745, + "objective/train/weighted_lm_loss": 3.0363094806671143, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.979783833026886, + "theoretical_loss": 3.3955054409774936, + "tokens_seen": 2264268800 + }, + { + "epoch": 0.81, + "learning_rate": 9.648127128263337e-05, + "loss": 3.0924, + "theoretical_loss": 3.395425499087902, + "tokens_seen": 2264924160 + }, + { + "epoch": 0.81, + "learning_rate": 9.62920923193341e-05, + "loss": 2.9934, + "theoretical_loss": 3.395297653629895, + "tokens_seen": 2265972736 + }, + { + "epoch": 0.81, + "learning_rate": 9.610291335603482e-05, + "loss": 3.1318, + "theoretical_loss": 3.3951698838746838, + "tokens_seen": 2267021312 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.4725620746612549, + "objective/train/docs_used": 1278407, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.076742649078369, + "objective/train/original_loss": 3.076742649078369, + "objective/train/theoretical_loss": 3.3951060273606806, + "objective/train/tokens_used": 2288005600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23542000353336334, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0484524965286255, + "objective/train/weighted_lm_loss": 3.224698066711426, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9519801139831543, + "theoretical_loss": 3.3951060273606806, + "tokens_seen": 2267545600 + }, + { + "epoch": 0.81, + "learning_rate": 9.591373439273552e-05, + "loss": 3.0248, + "theoretical_loss": 3.395042189742457, + "tokens_seen": 2268069888 + }, + { + "epoch": 0.81, + "learning_rate": 9.572455542943626e-05, + "loss": 3.0607, + "theoretical_loss": 3.394914571153525, + "tokens_seen": 2269118464 + }, + { + "epoch": 0.81, + "learning_rate": 9.553537646613696e-05, + "loss": 3.0339, + "theoretical_loss": 3.3947870280283183, + "tokens_seen": 2270167040 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.4836905300617218, + "objective/train/docs_used": 1280391, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7354981899261475, + "objective/train/original_loss": 2.7354984283447266, + "objective/train/theoretical_loss": 3.39470735186118, + "objective/train/tokens_used": 2291282400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23765282332897186, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049576997756958, + "objective/train/weighted_lm_loss": 2.871483325958252, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9534169435501099, + "theoretical_loss": 3.39470735186118, + "tokens_seen": 2270822400 + }, + { + "epoch": 0.81, + "learning_rate": 9.534619750283768e-05, + "loss": 3.0532, + "theoretical_loss": 3.3946595602873884, + "tokens_seen": 2271215616 + }, + { + "epoch": 0.81, + "learning_rate": 9.515701853953842e-05, + "loss": 3.1007, + "theoretical_loss": 3.3945321678514064, + "tokens_seen": 2272264192 + }, + { + "epoch": 0.81, + "learning_rate": 9.496783957623913e-05, + "loss": 3.0615, + "theoretical_loss": 3.394404850641165, + "tokens_seen": 2273312768 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.48565566539764404, + "objective/train/docs_used": 1282312, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.136152744293213, + "objective/train/original_loss": 3.1361522674560547, + "objective/train/theoretical_loss": 3.3943094120527944, + "objective/train/tokens_used": 2294559200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23977386951446533, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497844219207764, + "objective/train/weighted_lm_loss": 3.292649984359741, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9598386883735657, + "theoretical_loss": 3.3943094120527944, + "tokens_seen": 2274099200 + }, + { + "epoch": 0.81, + "learning_rate": 9.477866061293985e-05, + "loss": 3.1308, + "theoretical_loss": 3.3942776085775743, + "tokens_seen": 2274361344 + }, + { + "epoch": 0.81, + "learning_rate": 9.458948164964057e-05, + "loss": 3.0992, + "theoretical_loss": 3.394150441581666, + "tokens_seen": 2275409920 + }, + { + "epoch": 0.81, + "learning_rate": 9.440030268634129e-05, + "loss": 3.0643, + "theoretical_loss": 3.3940233495745904, + "tokens_seen": 2276458496 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.48642486333847046, + "objective/train/docs_used": 1284150, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.593719959259033, + "objective/train/original_loss": 2.593719959259033, + "objective/train/theoretical_loss": 3.3939122055207807, + "objective/train/tokens_used": 2297836000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24082650244235992, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498665571212769, + "objective/train/weighted_lm_loss": 2.723041534423828, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9529795050621033, + "theoretical_loss": 3.3939122055207807, + "tokens_seen": 2277376000 + }, + { + "epoch": 0.81, + "learning_rate": 9.4211123723042e-05, + "loss": 3.0231, + "theoretical_loss": 3.393896332477617, + "tokens_seen": 2277507072 + }, + { + "epoch": 0.81, + "learning_rate": 9.402194475974273e-05, + "loss": 3.0623, + "theoretical_loss": 3.3937693902121335, + "tokens_seen": 2278555648 + }, + { + "epoch": 0.81, + "learning_rate": 9.383276579644344e-05, + "loss": 3.0839, + "theoretical_loss": 3.393642522699647, + "tokens_seen": 2279604224 + }, + { + "epoch": 0.81, + "objective/train/advantage_avg": 0.48222169280052185, + "objective/train/docs_used": 1285842, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.761013984680176, + "objective/train/original_loss": 2.761013984680176, + "objective/train/theoretical_loss": 3.393515729861783, + "objective/train/tokens_used": 2301112800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2392144352197647, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494379997253418, + "objective/train/weighted_lm_loss": 2.8989064693450928, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9535624980926514, + "theoretical_loss": 3.393515729861783, + "tokens_seen": 2280652800 + }, + { + "epoch": 0.81, + "learning_rate": 9.364358683314416e-05, + "loss": 3.0902, + "theoretical_loss": 3.393515729861783, + "tokens_seen": 2280652800 + }, + { + "epoch": 0.81, + "learning_rate": 9.345440786984488e-05, + "loss": 3.111, + "theoretical_loss": 3.3933890116202843, + "tokens_seen": 2281701376 + }, + { + "epoch": 0.82, + "learning_rate": 9.32652289065456e-05, + "loss": 3.1021, + "theoretical_loss": 3.3932623678970133, + "tokens_seen": 2282749952 + }, + { + "epoch": 0.82, + "learning_rate": 9.30760499432463e-05, + "loss": 3.0894, + "theoretical_loss": 3.393135798613948, + "tokens_seen": 2283798528 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.48043379187583923, + "objective/train/docs_used": 1287535, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0948588848114014, + "objective/train/original_loss": 3.0948591232299805, + "objective/train/theoretical_loss": 3.3931199826837606, + "objective/train/tokens_used": 2304389600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23888222873210907, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049257516860962, + "objective/train/weighted_lm_loss": 3.246586799621582, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9514316320419312, + "theoretical_loss": 3.3931199826837606, + "tokens_seen": 2283929600 + }, + { + "epoch": 0.82, + "learning_rate": 9.288687097994704e-05, + "loss": 3.1324, + "theoretical_loss": 3.3930093036931854, + "tokens_seen": 2284847104 + }, + { + "epoch": 0.82, + "learning_rate": 9.269769201664776e-05, + "loss": 3.0722, + "theoretical_loss": 3.392882883056939, + "tokens_seen": 2285895680 + }, + { + "epoch": 0.82, + "learning_rate": 9.250851305334847e-05, + "loss": 3.0668, + "theoretical_loss": 3.39275653662754, + "tokens_seen": 2286944256 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.48825782537460327, + "objective/train/docs_used": 1288943, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.927969217300415, + "objective/train/original_loss": 2.927968978881836, + "objective/train/theoretical_loss": 3.392724961605919, + "objective/train/tokens_used": 2307666400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24235625565052032, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500577688217163, + "objective/train/weighted_lm_loss": 3.073913335800171, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.958259642124176, + "theoretical_loss": 3.392724961605919, + "tokens_seen": 2287206400 + }, + { + "epoch": 0.82, + "learning_rate": 9.23193340900492e-05, + "loss": 3.1182, + "theoretical_loss": 3.3926302643274355, + "tokens_seen": 2287992832 + }, + { + "epoch": 0.82, + "learning_rate": 9.21301551267499e-05, + "loss": 3.0441, + "theoretical_loss": 3.39250406607919, + "tokens_seen": 2289041408 + }, + { + "epoch": 0.82, + "learning_rate": 9.194097616345063e-05, + "loss": 3.1382, + "theoretical_loss": 3.3923779418054827, + "tokens_seen": 2290089984 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.48361656069755554, + "objective/train/docs_used": 1290666, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5616543292999268, + "objective/train/original_loss": 2.561654567718506, + "objective/train/theoretical_loss": 3.392330664258642, + "objective/train/tokens_used": 2310943200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23792539536952972, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495710372924805, + "objective/train/weighted_lm_loss": 2.6900863647460938, + "objective/train/weights_max": 1.0512185096740723, + "objective/train/weights_min": 0.9807561635971069, + "theoretical_loss": 3.392330664258642, + "tokens_seen": 2290483200 + }, + { + "epoch": 0.82, + "learning_rate": 9.175179720015135e-05, + "loss": 3.1281, + "theoretical_loss": 3.3922518914291113, + "tokens_seen": 2291138560 + }, + { + "epoch": 0.82, + "learning_rate": 9.156261823685207e-05, + "loss": 3.1114, + "theoretical_loss": 3.3921259148729876, + "tokens_seen": 2292187136 + }, + { + "epoch": 0.82, + "learning_rate": 9.137343927355278e-05, + "loss": 3.0929, + "theoretical_loss": 3.39200001206014, + "tokens_seen": 2293235712 + }, + { + "debugging/Self-BLEU-5": 0.515096219338814, + "debugging/distinct-1-grams": 0.7923118197529954, + "debugging/distinct-2-grams": 0.9542743672238616, + "debugging/entropy-1-grams": 6.147062465574793, + "debugging/entropy-2-grams": 7.107820582543805, + "debugging/length": 504.1666666666667, + "debugging/num_segments": 18, + "debugging/raw_token_scores_avg": 0.012544393539428711, + "debugging/raw_token_scores_std": 0.04549340158700943, + "epoch": 0.82, + "objective/train/advantage_avg": 0.48745694756507874, + "objective/train/docs_used": 1292636, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8265254497528076, + "objective/train/original_loss": 2.8265256881713867, + "objective/train/theoretical_loss": 3.3919370882834223, + "objective/train/tokens_used": 2314220000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23968477547168732, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499640703201294, + "objective/train/weighted_lm_loss": 2.9688053131103516, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9514824151992798, + "theoretical_loss": 3.3919370882834223, + "tokens_seen": 2293760000 + }, + { + "epoch": 0.82, + "learning_rate": 9.118426031025351e-05, + "loss": 3.0551, + "theoretical_loss": 3.3918741829137113, + "tokens_seen": 2294284288 + }, + { + "epoch": 0.82, + "learning_rate": 9.099508134695422e-05, + "loss": 3.1223, + "theoretical_loss": 3.3917484273569602, + "tokens_seen": 2295332864 + }, + { + "epoch": 0.82, + "learning_rate": 9.080590238365494e-05, + "loss": 3.1222, + "theoretical_loss": 3.39162274531326, + "tokens_seen": 2296381440 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.48345085978507996, + "objective/train/docs_used": 1294107, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.764035940170288, + "objective/train/original_loss": 2.764035701751709, + "objective/train/theoretical_loss": 3.391544231332792, + "objective/train/tokens_used": 2317496800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23902611434459686, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495600700378418, + "objective/train/weighted_lm_loss": 2.902003049850464, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9564229846000671, + "theoretical_loss": 3.391544231332792, + "tokens_seen": 2297036800 + }, + { + "epoch": 0.82, + "learning_rate": 9.061672342035567e-05, + "loss": 3.1565, + "theoretical_loss": 3.391497136706099, + "tokens_seen": 2297430016 + }, + { + "epoch": 0.82, + "learning_rate": 9.042754445705638e-05, + "loss": 3.16, + "theoretical_loss": 3.3913716014590807, + "tokens_seen": 2298478592 + }, + { + "epoch": 0.82, + "learning_rate": 9.02383654937571e-05, + "loss": 3.2324, + "theoretical_loss": 3.3912461394959212, + "tokens_seen": 2299527168 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.4791503846645355, + "objective/train/docs_used": 1294624, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1768290996551514, + "objective/train/original_loss": 3.1768290996551514, + "objective/train/theoretical_loss": 3.3911520910702593, + "objective/train/tokens_used": 2320773600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23488926887512207, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049108862876892, + "objective/train/weighted_lm_loss": 3.3320627212524414, + "objective/train/weights_max": 1.0512152910232544, + "objective/train/weights_min": 0.962348997592926, + "theoretical_loss": 3.3911520910702593, + "tokens_seen": 2300313600 + }, + { + "epoch": 0.82, + "learning_rate": 9.004918653045782e-05, + "loss": 3.2351, + "theoretical_loss": 3.391120750740452, + "tokens_seen": 2300575744 + }, + { + "epoch": 0.82, + "learning_rate": 8.986000756715854e-05, + "loss": 3.2923, + "theoretical_loss": 3.3909954351166176, + "tokens_seen": 2301624320 + }, + { + "epoch": 0.82, + "learning_rate": 8.967082860385925e-05, + "loss": 3.2683, + "theoretical_loss": 3.3908701925484768, + "tokens_seen": 2302672896 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.45928680896759033, + "objective/train/docs_used": 1296468, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9212799072265625, + "objective/train/original_loss": 2.9212799072265625, + "objective/train/theoretical_loss": 3.390760665170238, + "objective/train/tokens_used": 2324050400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2240046262741089, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0470670461654663, + "objective/train/weighted_lm_loss": 3.061581611633301, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9757957458496094, + "theoretical_loss": 3.390760665170238, + "tokens_seen": 2303590400 + }, + { + "epoch": 0.82, + "learning_rate": 8.948164964055998e-05, + "loss": 3.2527, + "theoretical_loss": 3.3907450229602016, + "tokens_seen": 2303721472 + }, + { + "epoch": 0.82, + "learning_rate": 8.929247067726069e-05, + "loss": 3.2998, + "theoretical_loss": 3.390619926276077, + "tokens_seen": 2304770048 + }, + { + "epoch": 0.82, + "learning_rate": 8.910329171396141e-05, + "loss": 3.2132, + "theoretical_loss": 3.390494902420501, + "tokens_seen": 2305818624 + }, + { + "epoch": 0.82, + "objective/train/advantage_avg": 0.48398545384407043, + "objective/train/docs_used": 1298252, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6797232627868652, + "objective/train/original_loss": 2.6797232627868652, + "objective/train/theoretical_loss": 3.390369951317984, + "objective/train/tokens_used": 2327327200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24018828570842743, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049619436264038, + "objective/train/weighted_lm_loss": 2.812175989151001, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9517936706542969, + "theoretical_loss": 3.390369951317984, + "tokens_seen": 2306867200 + }, + { + "epoch": 0.82, + "learning_rate": 8.891411275066213e-05, + "loss": 3.2114, + "theoretical_loss": 3.390369951317984, + "tokens_seen": 2306867200 + }, + { + "epoch": 0.82, + "learning_rate": 8.872493378736285e-05, + "loss": 3.2249, + "theoretical_loss": 3.3902450728931504, + "tokens_seen": 2307915776 + }, + { + "epoch": 0.82, + "learning_rate": 8.853575482406357e-05, + "loss": 3.179, + "theoretical_loss": 3.390120267070735, + "tokens_seen": 2308964352 + }, + { + "epoch": 0.83, + "learning_rate": 8.834657586076429e-05, + "loss": 3.167, + "theoretical_loss": 3.3899955337755854, + "tokens_seen": 2310012928 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.46928122639656067, + "objective/train/docs_used": 1299979, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0463294982910156, + "objective/train/original_loss": 3.0463294982910156, + "objective/train/theoretical_loss": 3.3899799472095267, + "objective/train/tokens_used": 2330604000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23485510051250458, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481215715408325, + "objective/train/weighted_lm_loss": 3.1912734508514404, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9515340924263, + "theoretical_loss": 3.3899799472095267, + "tokens_seen": 2310144000 + }, + { + "epoch": 0.83, + "learning_rate": 8.815739689746501e-05, + "loss": 3.2139, + "theoretical_loss": 3.3898708729326614, + "tokens_seen": 2311061504 + }, + { + "epoch": 0.83, + "learning_rate": 8.796821793416572e-05, + "loss": 3.2259, + "theoretical_loss": 3.3897462844670345, + "tokens_seen": 2312110080 + }, + { + "epoch": 0.83, + "learning_rate": 8.777903897086645e-05, + "loss": 3.2116, + "theoretical_loss": 3.3896217683038863, + "tokens_seen": 2313158656 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.4831146001815796, + "objective/train/docs_used": 1302096, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2344326972961426, + "objective/train/original_loss": 3.2344326972961426, + "objective/train/theoretical_loss": 3.3895906505516047, + "objective/train/tokens_used": 2333880800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2407372146844864, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495350360870361, + "objective/train/weighted_lm_loss": 3.3936874866485596, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9515005946159363, + "theoretical_loss": 3.3895906505516047, + "tokens_seen": 2313420800 + }, + { + "epoch": 0.83, + "learning_rate": 8.758986000756716e-05, + "loss": 3.1729, + "theoretical_loss": 3.3894973243685116, + "tokens_seen": 2314207232 + }, + { + "epoch": 0.83, + "learning_rate": 8.740068104426788e-05, + "loss": 3.1754, + "theoretical_loss": 3.389372952586315, + "tokens_seen": 2315255808 + }, + { + "epoch": 0.83, + "learning_rate": 8.72115020809686e-05, + "loss": 3.1716, + "theoretical_loss": 3.3892486528828116, + "tokens_seen": 2316304384 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.47961023449897766, + "objective/train/docs_used": 1304213, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.84677791595459, + "objective/train/original_loss": 2.846778392791748, + "objective/train/theoretical_loss": 3.3892020590616028, + "objective/train/tokens_used": 2337157600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24148398637771606, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049188494682312, + "objective/train/weighted_lm_loss": 2.9851181507110596, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.951810896396637, + "theoretical_loss": 3.3892020590616028, + "tokens_seen": 2316697600 + }, + { + "epoch": 0.83, + "learning_rate": 8.702232311766932e-05, + "loss": 3.0908, + "theoretical_loss": 3.389124425183628, + "tokens_seen": 2317352960 + }, + { + "epoch": 0.83, + "learning_rate": 8.683314415437003e-05, + "loss": 3.0297, + "theoretical_loss": 3.3890002694145007, + "tokens_seen": 2318401536 + }, + { + "epoch": 0.83, + "learning_rate": 8.664396519107076e-05, + "loss": 3.1335, + "theoretical_loss": 3.388876185501276, + "tokens_seen": 2319450112 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.48336800932884216, + "objective/train/docs_used": 1306171, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2488811016082764, + "objective/train/original_loss": 3.2488808631896973, + "objective/train/theoretical_loss": 3.388814170467484, + "objective/train/tokens_used": 2340434400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23837092518806458, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495483875274658, + "objective/train/weighted_lm_loss": 3.409419298171997, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9514601826667786, + "theoretical_loss": 3.388814170467484, + "tokens_seen": 2319974400 + }, + { + "epoch": 0.83, + "learning_rate": 8.645478622777148e-05, + "loss": 3.1275, + "theoretical_loss": 3.3887521733699106, + "tokens_seen": 2320498688 + }, + { + "epoch": 0.83, + "learning_rate": 8.626560726447219e-05, + "loss": 3.1834, + "theoretical_loss": 3.388628232946471, + "tokens_seen": 2321547264 + }, + { + "epoch": 0.83, + "learning_rate": 8.607642830117292e-05, + "loss": 3.1321, + "theoretical_loss": 3.388504364157133, + "tokens_seen": 2322595840 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.472857803106308, + "objective/train/docs_used": 1308132, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8800430297851562, + "objective/train/original_loss": 2.8800430297851562, + "objective/train/theoretical_loss": 3.3884269825077302, + "objective/train/tokens_used": 2343711200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23990888893604279, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485048294067383, + "objective/train/weighted_lm_loss": 3.0166618824005127, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9515467882156372, + "theoretical_loss": 3.3884269825077302, + "tokens_seen": 2323251200 + }, + { + "epoch": 0.83, + "learning_rate": 8.588724933787363e-05, + "loss": 3.1415, + "theoretical_loss": 3.3883805669281815, + "tokens_seen": 2323644416 + }, + { + "epoch": 0.83, + "learning_rate": 8.569807037457435e-05, + "loss": 3.1349, + "theoretical_loss": 3.388256841186011, + "tokens_seen": 2324692992 + }, + { + "epoch": 0.83, + "learning_rate": 8.550889141127507e-05, + "loss": 3.0799, + "theoretical_loss": 3.3881331868571234, + "tokens_seen": 2325741568 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.4859318435192108, + "objective/train/docs_used": 1310381, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.184096097946167, + "objective/train/original_loss": 3.184096336364746, + "objective/train/theoretical_loss": 3.3880404929312737, + "objective/train/tokens_used": 2346988000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23933574557304382, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498096942901611, + "objective/train/weighted_lm_loss": 3.3434245586395264, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.96250981092453, + "theoretical_loss": 3.3880404929312737, + "tokens_seen": 2326528000 + }, + { + "epoch": 0.83, + "learning_rate": 8.531971244797579e-05, + "loss": 3.1258, + "theoretical_loss": 3.3880096038681313, + "tokens_seen": 2326790144 + }, + { + "epoch": 0.83, + "learning_rate": 8.51305334846765e-05, + "loss": 3.1036, + "theoretical_loss": 3.387886092145755, + "tokens_seen": 2327838720 + }, + { + "epoch": 0.83, + "learning_rate": 8.494135452137723e-05, + "loss": 3.0237, + "theoretical_loss": 3.387762651616822, + "tokens_seen": 2328887296 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.48883962631225586, + "objective/train/docs_used": 1312090, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.894986152648926, + "objective/train/original_loss": 2.894986152648926, + "objective/train/theoretical_loss": 3.3876546994974377, + "objective/train/tokens_used": 2350264800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24219678342342377, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501152276992798, + "objective/train/weighted_lm_loss": 3.03971266746521, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9689180850982666, + "theoretical_loss": 3.3876546994974377, + "tokens_seen": 2329804800 + }, + { + "epoch": 0.83, + "learning_rate": 8.475217555807794e-05, + "loss": 3.1501, + "theoretical_loss": 3.3876392822082697, + "tokens_seen": 2329935872 + }, + { + "epoch": 0.83, + "learning_rate": 8.456299659477866e-05, + "loss": 3.1006, + "theoretical_loss": 3.3875159838471416, + "tokens_seen": 2330984448 + }, + { + "epoch": 0.83, + "learning_rate": 8.43738176314794e-05, + "loss": 3.0854, + "theoretical_loss": 3.3873927564605895, + "tokens_seen": 2332033024 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.49127140641212463, + "objective/train/docs_used": 1313833, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1920430660247803, + "objective/train/original_loss": 3.1920433044433594, + "objective/train/theoretical_loss": 3.3872695999758733, + "objective/train/tokens_used": 2353541600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24283026158809662, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503616333007812, + "objective/train/weighted_lm_loss": 3.3526406288146973, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9671012163162231, + "theoretical_loss": 3.3872695999758733, + "tokens_seen": 2333081600 + }, + { + "epoch": 0.83, + "learning_rate": 8.41846386681801e-05, + "loss": 3.0824, + "theoretical_loss": 3.3872695999758733, + "tokens_seen": 2333081600 + }, + { + "epoch": 0.83, + "learning_rate": 8.399545970488082e-05, + "loss": 3.1084, + "theoretical_loss": 3.3871465143203583, + "tokens_seen": 2334130176 + }, + { + "epoch": 0.83, + "learning_rate": 8.380628074158154e-05, + "loss": 3.0542, + "theoretical_loss": 3.387023499421519, + "tokens_seen": 2335178752 + }, + { + "epoch": 0.83, + "learning_rate": 8.361710177828226e-05, + "loss": 3.1007, + "theoretical_loss": 3.386900555206935, + "tokens_seen": 2336227328 + }, + { + "epoch": 0.83, + "objective/train/advantage_avg": 0.47514820098876953, + "objective/train/docs_used": 1316188, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9037365913391113, + "objective/train/original_loss": 2.9037368297576904, + "objective/train/theoretical_loss": 3.3868851921464964, + "objective/train/tokens_used": 2356818400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23783263564109802, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048723578453064, + "objective/train/weighted_lm_loss": 3.045696496963501, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9521853923797607, + "theoretical_loss": 3.3868851921464964, + "tokens_seen": 2336358400 + }, + { + "epoch": 0.83, + "learning_rate": 8.342792281498297e-05, + "loss": 3.1275, + "theoretical_loss": 3.3867776816042934, + "tokens_seen": 2337275904 + }, + { + "epoch": 0.84, + "learning_rate": 8.323874385168369e-05, + "loss": 3.081, + "theoretical_loss": 3.3866548785413872, + "tokens_seen": 2338324480 + }, + { + "epoch": 0.84, + "learning_rate": 8.304956488838441e-05, + "loss": 3.0834, + "theoretical_loss": 3.3865321459461155, + "tokens_seen": 2339373056 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.48149341344833374, + "objective/train/docs_used": 1317929, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8814351558685303, + "objective/train/original_loss": 2.8814353942871094, + "objective/train/theoretical_loss": 3.3865014737994263, + "objective/train/tokens_used": 2360095200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23636046051979065, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493507385253906, + "objective/train/weighted_lm_loss": 3.022583484649658, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9719198942184448, + "theoretical_loss": 3.3865014737994263, + "tokens_seen": 2339635200 + }, + { + "epoch": 0.84, + "learning_rate": 8.286038592508513e-05, + "loss": 3.0261, + "theoretical_loss": 3.386409483746484, + "tokens_seen": 2340421632 + }, + { + "epoch": 0.84, + "learning_rate": 8.267120696178584e-05, + "loss": 2.9999, + "theoretical_loss": 3.386286891870604, + "tokens_seen": 2341470208 + }, + { + "epoch": 0.84, + "learning_rate": 8.248202799848657e-05, + "loss": 3.0883, + "theoretical_loss": 3.386164370246692, + "tokens_seen": 2342518784 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.492958128452301, + "objective/train/docs_used": 1319369, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9171993732452393, + "objective/train/original_loss": 2.9171996116638184, + "objective/train/theoretical_loss": 3.386118442734927, + "objective/train/tokens_used": 2363372000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2451806366443634, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505422353744507, + "objective/train/weighted_lm_loss": 3.063903570175171, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9514467716217041, + "theoretical_loss": 3.386118442734927, + "tokens_seen": 2342912000 + }, + { + "epoch": 0.84, + "learning_rate": 8.229284903518728e-05, + "loss": 3.2241, + "theoretical_loss": 3.3860419188030693, + "tokens_seen": 2343567360 + }, + { + "epoch": 0.84, + "learning_rate": 8.2103670071888e-05, + "loss": 3.1287, + "theoretical_loss": 3.3859195374681637, + "tokens_seen": 2344615936 + }, + { + "epoch": 0.84, + "learning_rate": 8.191449110858873e-05, + "loss": 3.0803, + "theoretical_loss": 3.3857972261705074, + "tokens_seen": 2345664512 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.483732134103775, + "objective/train/docs_used": 1321381, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9508039951324463, + "objective/train/original_loss": 2.9508039951324463, + "objective/train/theoretical_loss": 3.3857360967633428, + "objective/train/tokens_used": 2366648800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24197065830230713, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496031045913696, + "objective/train/weighted_lm_loss": 3.095860719680786, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9532303214073181, + "theoretical_loss": 3.3857360967633428, + "tokens_seen": 2346188800 + }, + { + "epoch": 0.84, + "learning_rate": 8.172531214528944e-05, + "loss": 3.0983, + "theoretical_loss": 3.385674984838737, + "tokens_seen": 2346713088 + }, + { + "epoch": 0.84, + "learning_rate": 8.153613318199016e-05, + "loss": 3.0987, + "theoretical_loss": 3.3855528134015946, + "tokens_seen": 2347761664 + }, + { + "epoch": 0.84, + "learning_rate": 8.134695421869088e-05, + "loss": 3.1127, + "theoretical_loss": 3.385430711787925, + "tokens_seen": 2348810240 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.4847314953804016, + "objective/train/docs_used": 1323513, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6906464099884033, + "objective/train/original_loss": 2.690646171569824, + "objective/train/theoretical_loss": 3.38535443370504, + "objective/train/tokens_used": 2369925600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24226725101470947, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497045516967773, + "objective/train/weighted_lm_loss": 2.8229305744171143, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9514601826667786, + "theoretical_loss": 3.38535443370504, + "tokens_seen": 2349465600 + }, + { + "epoch": 0.84, + "learning_rate": 8.11577752553916e-05, + "loss": 3.0166, + "theoretical_loss": 3.3853086799266787, + "tokens_seen": 2349858816 + }, + { + "epoch": 0.84, + "learning_rate": 8.096859629209231e-05, + "loss": 3.0753, + "theoretical_loss": 3.38518671774691, + "tokens_seen": 2350907392 + }, + { + "epoch": 0.84, + "learning_rate": 8.077941732879304e-05, + "loss": 3.0972, + "theoretical_loss": 3.385064825177776, + "tokens_seen": 2351955968 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.4856957793235779, + "objective/train/docs_used": 1325482, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.927109479904175, + "objective/train/original_loss": 2.927109718322754, + "objective/train/theoretical_loss": 3.3849734513903473, + "objective/train/tokens_used": 2373202400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24306103587150574, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498050451278687, + "objective/train/weighted_lm_loss": 3.0720131397247314, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9516735672950745, + "theoretical_loss": 3.3849734513903473, + "tokens_seen": 2352742400 + }, + { + "epoch": 0.84, + "learning_rate": 8.059023836549375e-05, + "loss": 3.0963, + "theoretical_loss": 3.384943002148538, + "tokens_seen": 2353004544 + }, + { + "epoch": 0.84, + "learning_rate": 8.040105940219447e-05, + "loss": 3.0522, + "theoretical_loss": 3.384821248588562, + "tokens_seen": 2354053120 + }, + { + "epoch": 0.84, + "learning_rate": 8.021188043889519e-05, + "loss": 3.0707, + "theoretical_loss": 3.3846995644273132, + "tokens_seen": 2355101696 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.48670029640197754, + "objective/train/docs_used": 1326516, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.063535690307617, + "objective/train/original_loss": 3.063535690307617, + "objective/train/theoretical_loss": 3.3845931476594964, + "objective/train/tokens_used": 2376479200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24172568321228027, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498987436294556, + "objective/train/weighted_lm_loss": 3.215937614440918, + "objective/train/weights_max": 1.0512151718139648, + "objective/train/weights_min": 0.952146589756012, + "theoretical_loss": 3.3845931476594964, + "tokens_seen": 2356019200 + }, + { + "epoch": 0.84, + "learning_rate": 8.002270147559591e-05, + "loss": 3.091, + "theoretical_loss": 3.384577949594364, + "tokens_seen": 2356150272 + }, + { + "epoch": 0.84, + "learning_rate": 7.983352251229663e-05, + "loss": 3.1252, + "theoretical_loss": 3.3844564040193887, + "tokens_seen": 2357198848 + }, + { + "epoch": 0.84, + "learning_rate": 7.964434354899735e-05, + "loss": 3.2091, + "theoretical_loss": 3.384334927632162, + "tokens_seen": 2358247424 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.47246748208999634, + "objective/train/docs_used": 1327582, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9666707515716553, + "objective/train/original_loss": 2.9666709899902344, + "objective/train/theoretical_loss": 3.3842135203625627, + "objective/train/tokens_used": 2379756000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23522746562957764, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048442006111145, + "objective/train/weighted_lm_loss": 3.1088528633117676, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9515091180801392, + "theoretical_loss": 3.3842135203625627, + "tokens_seen": 2359296000 + }, + { + "epoch": 0.84, + "learning_rate": 7.945516458569807e-05, + "loss": 3.1832, + "theoretical_loss": 3.3842135203625627, + "tokens_seen": 2359296000 + }, + { + "epoch": 0.84, + "learning_rate": 7.926598562239878e-05, + "loss": 3.2233, + "theoretical_loss": 3.3840921821405723, + "tokens_seen": 2360344576 + }, + { + "epoch": 0.84, + "learning_rate": 7.907680665909952e-05, + "loss": 3.2095, + "theoretical_loss": 3.3839709128962725, + "tokens_seen": 2361393152 + }, + { + "epoch": 0.84, + "learning_rate": 7.888762769580022e-05, + "loss": 3.1732, + "theoretical_loss": 3.3838497125598486, + "tokens_seen": 2362441728 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.4920428693294525, + "objective/train/docs_used": 1329627, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9259815216064453, + "objective/train/original_loss": 2.9259815216064453, + "objective/train/theoretical_loss": 3.3838345673594072, + "objective/train/tokens_used": 2383032800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24389143288135529, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504441261291504, + "objective/train/weighted_lm_loss": 3.0741143226623535, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9753554463386536, + "theoretical_loss": 3.3838345673594072, + "tokens_seen": 2362572800 + }, + { + "epoch": 0.84, + "learning_rate": 7.869844873250094e-05, + "loss": 3.1582, + "theoretical_loss": 3.383728581061586, + "tokens_seen": 2363490304 + }, + { + "epoch": 0.84, + "learning_rate": 7.850926976920166e-05, + "loss": 3.1248, + "theoretical_loss": 3.383607518331873, + "tokens_seen": 2364538880 + }, + { + "epoch": 0.84, + "learning_rate": 7.832009080590238e-05, + "loss": 3.1062, + "theoretical_loss": 3.3834865243011985, + "tokens_seen": 2365587456 + }, + { + "epoch": 0.84, + "objective/train/advantage_avg": 0.47797757387161255, + "objective/train/docs_used": 1331397, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0442779064178467, + "objective/train/original_loss": 3.044278144836426, + "objective/train/theoretical_loss": 3.383456286519618, + "objective/train/tokens_used": 2386309600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24260950088500977, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490306615829468, + "objective/train/weighted_lm_loss": 3.191368818283081, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9514516592025757, + "theoretical_loss": 3.383456286519618, + "tokens_seen": 2365849600 + }, + { + "epoch": 0.85, + "learning_rate": 7.813091184260309e-05, + "loss": 3.1024, + "theoretical_loss": 3.383365598900151, + "tokens_seen": 2366636032 + }, + { + "epoch": 0.85, + "learning_rate": 7.794173287930383e-05, + "loss": 3.1292, + "theoretical_loss": 3.3832447420594227, + "tokens_seen": 2367684608 + }, + { + "epoch": 0.85, + "learning_rate": 7.775255391600455e-05, + "loss": 3.1855, + "theoretical_loss": 3.383123953709804, + "tokens_seen": 2368733184 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.49193400144577026, + "objective/train/docs_used": 1333210, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.104257106781006, + "objective/train/original_loss": 3.1042566299438477, + "objective/train/theoretical_loss": 3.383078675722453, + "objective/train/tokens_used": 2389586400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24367199838161469, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504320859909058, + "objective/train/weighted_lm_loss": 3.261072874069214, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9764650464057922, + "theoretical_loss": 3.383078675722453, + "tokens_seen": 2369126400 + }, + { + "epoch": 0.85, + "learning_rate": 7.756337495270525e-05, + "loss": 3.1193, + "theoretical_loss": 3.383003233782187, + "tokens_seen": 2369781760 + }, + { + "epoch": 0.85, + "learning_rate": 7.737419598940599e-05, + "loss": 3.173, + "theoretical_loss": 3.382882582207563, + "tokens_seen": 2370830336 + }, + { + "epoch": 0.85, + "learning_rate": 7.71850170261067e-05, + "loss": 3.1487, + "theoretical_loss": 3.3827619989170254, + "tokens_seen": 2371878912 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.47686776518821716, + "objective/train/docs_used": 1334412, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9060118198394775, + "objective/train/original_loss": 2.9060120582580566, + "objective/train/theoretical_loss": 3.3827017328567823, + "objective/train/tokens_used": 2392863200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.235874742269516, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048885703086853, + "objective/train/weighted_lm_loss": 3.0491559505462646, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9522488117218018, + "theoretical_loss": 3.3827017328567823, + "tokens_seen": 2372403200 + }, + { + "epoch": 0.85, + "learning_rate": 7.699583806280741e-05, + "loss": 3.1017, + "theoretical_loss": 3.3826414838417653, + "tokens_seen": 2372927488 + }, + { + "epoch": 0.85, + "learning_rate": 7.680665909950814e-05, + "loss": 3.1413, + "theoretical_loss": 3.382521036913075, + "tokens_seen": 2373976064 + }, + { + "epoch": 0.85, + "learning_rate": 7.661748013620886e-05, + "loss": 3.1409, + "theoretical_loss": 3.3824006580623447, + "tokens_seen": 2375024640 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.4799953103065491, + "objective/train/docs_used": 1336383, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9855668544769287, + "objective/train/original_loss": 2.9855666160583496, + "objective/train/theoretical_loss": 3.3823254558210323, + "objective/train/tokens_used": 2396140000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23651105165481567, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049201488494873, + "objective/train/weighted_lm_loss": 3.132995128631592, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9516184329986572, + "theoretical_loss": 3.3823254558210323, + "tokens_seen": 2375680000 + }, + { + "epoch": 0.85, + "learning_rate": 7.642830117290956e-05, + "loss": 3.0911, + "theoretical_loss": 3.382280347221066, + "tokens_seen": 2376073216 + }, + { + "epoch": 0.85, + "learning_rate": 7.62391222096103e-05, + "loss": 3.1107, + "theoretical_loss": 3.3821601043208283, + "tokens_seen": 2377121792 + }, + { + "epoch": 0.85, + "learning_rate": 7.6049943246311e-05, + "loss": 3.1237, + "theoretical_loss": 3.3820399292933194, + "tokens_seen": 2378170368 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.47441014647483826, + "objective/train/docs_used": 1338132, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.338620901107788, + "objective/train/original_loss": 3.338620662689209, + "objective/train/theoretical_loss": 3.381949842523129, + "objective/train/tokens_used": 2399416800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2352495640516281, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486366748809814, + "objective/train/weighted_lm_loss": 3.5036096572875977, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9518207907676697, + "theoretical_loss": 3.381949842523129, + "tokens_seen": 2378956800 + }, + { + "epoch": 0.85, + "learning_rate": 7.586076428301172e-05, + "loss": 3.0793, + "theoretical_loss": 3.381919822070328, + "tokens_seen": 2379218944 + }, + { + "epoch": 0.85, + "learning_rate": 7.567158531971246e-05, + "loss": 3.1023, + "theoretical_loss": 3.3817997825837396, + "tokens_seen": 2380267520 + }, + { + "epoch": 0.85, + "learning_rate": 7.548240635641317e-05, + "loss": 3.1106, + "theoretical_loss": 3.3816798107655384, + "tokens_seen": 2381316096 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.48399755358695984, + "objective/train/docs_used": 1340136, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9534103870391846, + "objective/train/original_loss": 2.9534106254577637, + "objective/train/theoretical_loss": 3.381574890880442, + "objective/train/tokens_used": 2402693600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24139027297496796, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496268272399902, + "objective/train/weighted_lm_loss": 3.0994439125061035, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9525738954544067, + "theoretical_loss": 3.381574890880442, + "tokens_seen": 2382233600 + }, + { + "epoch": 0.85, + "learning_rate": 7.529322739311389e-05, + "loss": 3.1093, + "theoretical_loss": 3.3815599065478072, + "tokens_seen": 2382364672 + }, + { + "epoch": 0.85, + "learning_rate": 7.51040484298146e-05, + "loss": 3.173, + "theoretical_loss": 3.3814400698627263, + "tokens_seen": 2383413248 + }, + { + "epoch": 0.85, + "learning_rate": 7.491486946651533e-05, + "loss": 3.1798, + "theoretical_loss": 3.3813203006425745, + "tokens_seen": 2384461824 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.4814996123313904, + "objective/train/docs_used": 1341857, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8379721641540527, + "objective/train/original_loss": 2.837972640991211, + "objective/train/theoretical_loss": 3.3812005988197273, + "objective/train/tokens_used": 2405970400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2411598116159439, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493756532669067, + "objective/train/weighted_lm_loss": 2.9767820835113525, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9520983099937439, + "theoretical_loss": 3.3812005988197273, + "tokens_seen": 2385510400 + }, + { + "epoch": 0.85, + "learning_rate": 7.472569050321603e-05, + "loss": 3.1044, + "theoretical_loss": 3.3812005988197273, + "tokens_seen": 2385510400 + }, + { + "epoch": 0.85, + "learning_rate": 7.453651153991677e-05, + "loss": 3.1587, + "theoretical_loss": 3.3810809643266593, + "tokens_seen": 2386558976 + }, + { + "epoch": 0.85, + "learning_rate": 7.434733257661748e-05, + "loss": 3.1508, + "theoretical_loss": 3.3809613970959402, + "tokens_seen": 2387607552 + }, + { + "epoch": 0.85, + "learning_rate": 7.41581536133182e-05, + "loss": 3.1208, + "theoretical_loss": 3.3808418970602387, + "tokens_seen": 2388656128 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.46042418479919434, + "objective/train/docs_used": 1343487, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1994292736053467, + "objective/train/original_loss": 3.1994290351867676, + "objective/train/theoretical_loss": 3.380826964277076, + "objective/train/tokens_used": 2409247200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23168209195137024, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.04721999168396, + "objective/train/weighted_lm_loss": 3.3554635047912598, + "objective/train/weights_max": 1.0512161254882812, + "objective/train/weights_min": 0.9517953991889954, + "theoretical_loss": 3.380826964277076, + "tokens_seen": 2388787200 + }, + { + "epoch": 0.85, + "learning_rate": 7.396897465001892e-05, + "loss": 3.1114, + "theoretical_loss": 3.3807224641523193, + "tokens_seen": 2389704704 + }, + { + "epoch": 0.85, + "learning_rate": 7.377979568671964e-05, + "loss": 3.1946, + "theoretical_loss": 3.380603098305044, + "tokens_seen": 2390753280 + }, + { + "epoch": 0.85, + "learning_rate": 7.359061672342034e-05, + "loss": 3.1657, + "theoretical_loss": 3.38048379945137, + "tokens_seen": 2391801856 + }, + { + "epoch": 0.85, + "objective/train/advantage_avg": 0.4793241322040558, + "objective/train/docs_used": 1344994, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0667428970336914, + "objective/train/original_loss": 3.066742420196533, + "objective/train/theoretical_loss": 3.380453985197855, + "objective/train/tokens_used": 2412524000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23756831884384155, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491399765014648, + "objective/train/weighted_lm_loss": 3.2188243865966797, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.95333331823349, + "theoretical_loss": 3.380453985197855, + "tokens_seen": 2392064000 + }, + { + "epoch": 0.85, + "learning_rate": 7.340143776012108e-05, + "loss": 3.1791, + "theoretical_loss": 3.3803645675243534, + "tokens_seen": 2392850432 + }, + { + "epoch": 0.86, + "learning_rate": 7.32122587968218e-05, + "loss": 3.1188, + "theoretical_loss": 3.3802454024571436, + "tokens_seen": 2393899008 + }, + { + "epoch": 0.86, + "learning_rate": 7.30230798335225e-05, + "loss": 3.1065, + "theoretical_loss": 3.3801263041829883, + "tokens_seen": 2394947584 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.4877893328666687, + "objective/train/docs_used": 1347049, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3766496181488037, + "objective/train/original_loss": 3.3766493797302246, + "objective/train/theoretical_loss": 3.380081659536656, + "objective/train/tokens_used": 2415800800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24295225739479065, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500140190124512, + "objective/train/weighted_lm_loss": 3.5450875759124756, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.951555073261261, + "theoretical_loss": 3.380081659536656, + "tokens_seen": 2395340800 + }, + { + "epoch": 0.86, + "learning_rate": 7.283390087022324e-05, + "loss": 3.1217, + "theoretical_loss": 3.3800072726352295, + "tokens_seen": 2395996160 + }, + { + "epoch": 0.86, + "learning_rate": 7.264472190692395e-05, + "loss": 3.106, + "theoretical_loss": 3.3798883077473056, + "tokens_seen": 2397044736 + }, + { + "epoch": 0.86, + "learning_rate": 7.245554294362467e-05, + "loss": 3.1241, + "theoretical_loss": 3.3797694094527504, + "tokens_seen": 2398093312 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.49422043561935425, + "objective/train/docs_used": 1348115, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.205146551132202, + "objective/train/original_loss": 3.205146551132202, + "objective/train/theoretical_loss": 3.379709985257241, + "objective/train/tokens_used": 2419077600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24489258229732513, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506669282913208, + "objective/train/weighted_lm_loss": 3.3676390647888184, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9589699506759644, + "theoretical_loss": 3.379709985257241, + "tokens_seen": 2398617600 + }, + { + "epoch": 0.86, + "learning_rate": 7.226636398032539e-05, + "loss": 3.0947, + "theoretical_loss": 3.379650577685193, + "tokens_seen": 2399141888 + }, + { + "epoch": 0.86, + "learning_rate": 7.207718501702611e-05, + "loss": 3.1216, + "theoretical_loss": 3.379531812378357, + "tokens_seen": 2400190464 + }, + { + "epoch": 0.86, + "learning_rate": 7.188800605372682e-05, + "loss": 3.1411, + "theoretical_loss": 3.3794131134660623, + "tokens_seen": 2401239040 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.49548736214637756, + "objective/train/docs_used": 1349979, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4230825901031494, + "objective/train/original_loss": 3.4230828285217285, + "objective/train/theoretical_loss": 3.379338960332488, + "objective/train/tokens_used": 2422354400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24636338651180267, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0508012771606445, + "objective/train/weighted_lm_loss": 3.5968568325042725, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.974648118019104, + "theoretical_loss": 3.379338960332488, + "tokens_seen": 2401894400 + }, + { + "epoch": 0.86, + "learning_rate": 7.169882709042755e-05, + "loss": 3.1393, + "theoretical_loss": 3.3792944808822227, + "tokens_seen": 2402287616 + }, + { + "epoch": 0.86, + "learning_rate": 7.150964812712826e-05, + "loss": 3.1068, + "theoretical_loss": 3.3791759145608458, + "tokens_seen": 2403336192 + }, + { + "epoch": 0.86, + "learning_rate": 7.132046916382898e-05, + "loss": 3.0804, + "theoretical_loss": 3.3790574144360352, + "tokens_seen": 2404384768 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.4680725932121277, + "objective/train/docs_used": 1352287, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8557844161987305, + "objective/train/original_loss": 2.8557848930358887, + "objective/train/theoretical_loss": 3.378968582744336, + "objective/train/tokens_used": 2425631200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23578692972660065, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0480057001113892, + "objective/train/weighted_lm_loss": 2.99615478515625, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9522780179977417, + "theoretical_loss": 3.378968582744336, + "tokens_seen": 2405171200 + }, + { + "epoch": 0.86, + "learning_rate": 7.113129020052971e-05, + "loss": 3.0916, + "theoretical_loss": 3.378938980441988, + "tokens_seen": 2405433344 + }, + { + "epoch": 0.86, + "learning_rate": 7.094211123723042e-05, + "loss": 3.0778, + "theoretical_loss": 3.3788206125129947, + "tokens_seen": 2406481920 + }, + { + "epoch": 0.86, + "learning_rate": 7.075293227393114e-05, + "loss": 3.0851, + "theoretical_loss": 3.3787023105834413, + "tokens_seen": 2407530496 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.47238680720329285, + "objective/train/docs_used": 1354179, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.906632423400879, + "objective/train/original_loss": 2.9066319465637207, + "objective/train/theoretical_loss": 3.378598850483736, + "objective/train/tokens_used": 2428908000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2311716079711914, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048413634300232, + "objective/train/weighted_lm_loss": 3.047044515609741, + "objective/train/weights_max": 1.051215648651123, + "objective/train/weights_min": 0.9545229077339172, + "theoretical_loss": 3.378598850483736, + "tokens_seen": 2408448000 + }, + { + "epoch": 0.86, + "learning_rate": 7.056375331063186e-05, + "loss": 3.0996, + "theoretical_loss": 3.3785840745878057, + "tokens_seen": 2408579072 + }, + { + "epoch": 0.86, + "learning_rate": 7.037457434733258e-05, + "loss": 3.0983, + "theoretical_loss": 3.3784659044606604, + "tokens_seen": 2409627648 + }, + { + "epoch": 0.86, + "learning_rate": 7.018539538403329e-05, + "loss": 3.0816, + "theoretical_loss": 3.378347800136672, + "tokens_seen": 2410676224 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.4815124273300171, + "objective/train/docs_used": 1355984, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7397754192352295, + "objective/train/original_loss": 2.7397756576538086, + "objective/train/theoretical_loss": 3.378229761550598, + "objective/train/tokens_used": 2432184800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23886069655418396, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493652820587158, + "objective/train/weighted_lm_loss": 2.875002861022949, + "objective/train/weights_max": 1.0512160062789917, + "objective/train/weights_min": 0.9514920115470886, + "theoretical_loss": 3.378229761550598, + "tokens_seen": 2411724800 + }, + { + "epoch": 0.86, + "learning_rate": 6.999621642073402e-05, + "loss": 2.9866, + "theoretical_loss": 3.378229761550598, + "tokens_seen": 2411724800 + }, + { + "epoch": 0.86, + "learning_rate": 6.980703745743473e-05, + "loss": 3.071, + "theoretical_loss": 3.3781117886372902, + "tokens_seen": 2412773376 + }, + { + "epoch": 0.86, + "learning_rate": 6.961785849413545e-05, + "loss": 3.0636, + "theoretical_loss": 3.3779938813316943, + "tokens_seen": 2413821952 + }, + { + "epoch": 0.86, + "learning_rate": 6.942867953083617e-05, + "loss": 2.9877, + "theoretical_loss": 3.377876039568847, + "tokens_seen": 2414870528 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.4852672517299652, + "objective/train/docs_used": 1358162, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6078548431396484, + "objective/train/original_loss": 2.6078543663024902, + "objective/train/theoretical_loss": 3.377861313953734, + "objective/train/tokens_used": 2435461600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.240381121635437, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497486591339111, + "objective/train/weighted_lm_loss": 2.7380173206329346, + "objective/train/weights_max": 1.0512151718139648, + "objective/train/weights_min": 0.9583688378334045, + "theoretical_loss": 3.377861313953734, + "tokens_seen": 2415001600 + }, + { + "epoch": 0.86, + "learning_rate": 6.923950056753689e-05, + "loss": 3.0179, + "theoretical_loss": 3.3777582632838783, + "tokens_seen": 2415919104 + }, + { + "epoch": 0.86, + "learning_rate": 6.905032160423761e-05, + "loss": 3.0409, + "theoretical_loss": 3.3776405524120108, + "tokens_seen": 2416967680 + }, + { + "epoch": 0.86, + "learning_rate": 6.886114264093833e-05, + "loss": 3.0834, + "theoretical_loss": 3.3775229068885584, + "tokens_seen": 2418016256 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.49422746896743774, + "objective/train/docs_used": 1359820, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.702634811401367, + "objective/train/original_loss": 2.702634811401367, + "objective/train/theoretical_loss": 3.3774935057108135, + "objective/train/tokens_used": 2438738400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24477465450763702, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506671667099, + "objective/train/weighted_lm_loss": 2.839879274368286, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9933627843856812, + "theoretical_loss": 3.3774935057108135, + "tokens_seen": 2418278400 + }, + { + "epoch": 0.86, + "learning_rate": 6.867196367763905e-05, + "loss": 3.0768, + "theoretical_loss": 3.377405326648927, + "tokens_seen": 2419064832 + }, + { + "epoch": 0.86, + "learning_rate": 6.848278471433976e-05, + "loss": 3.0517, + "theoretical_loss": 3.377287811628616, + "tokens_seen": 2420113408 + }, + { + "epoch": 0.86, + "learning_rate": 6.829360575104049e-05, + "loss": 3.0728, + "theoretical_loss": 3.3771703617632136, + "tokens_seen": 2421161984 + }, + { + "epoch": 0.86, + "objective/train/advantage_avg": 0.49273377656936646, + "objective/train/docs_used": 1361438, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8423805236816406, + "objective/train/original_loss": 2.8423805236816406, + "objective/train/theoretical_loss": 3.377126334848307, + "objective/train/tokens_used": 2442015200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24471589922904968, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505174398422241, + "objective/train/weighted_lm_loss": 2.9853973388671875, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9665617346763611, + "theoretical_loss": 3.377126334848307, + "tokens_seen": 2421555200 + }, + { + "epoch": 0.87, + "learning_rate": 6.81044267877412e-05, + "loss": 3.0376, + "theoretical_loss": 3.3770529769884017, + "tokens_seen": 2422210560 + }, + { + "epoch": 0.87, + "learning_rate": 6.791524782444192e-05, + "loss": 3.1151, + "theoretical_loss": 3.376935657239953, + "tokens_seen": 2423259136 + }, + { + "epoch": 0.87, + "learning_rate": 6.772606886114264e-05, + "loss": 3.06, + "theoretical_loss": 3.3768184024537313, + "tokens_seen": 2424307712 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.4682343304157257, + "objective/train/docs_used": 1363257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8115761280059814, + "objective/train/original_loss": 2.8115761280059814, + "objective/train/theoretical_loss": 3.3767597994014373, + "objective/train/tokens_used": 2445292000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23106399178504944, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0479977130889893, + "objective/train/weighted_lm_loss": 2.944631814956665, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9522070288658142, + "theoretical_loss": 3.3767597994014373, + "tokens_seen": 2424832000 + }, + { + "epoch": 0.87, + "learning_rate": 6.753688989784336e-05, + "loss": 3.1218, + "theoretical_loss": 3.376701212565691, + "tokens_seen": 2425356288 + }, + { + "epoch": 0.87, + "learning_rate": 6.734771093454407e-05, + "loss": 3.0973, + "theoretical_loss": 3.376584087511877, + "tokens_seen": 2426404864 + }, + { + "epoch": 0.87, + "learning_rate": 6.71585319712448e-05, + "loss": 3.0884, + "theoretical_loss": 3.3764670272284265, + "tokens_seen": 2427453440 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.4915653467178345, + "objective/train/docs_used": 1365224, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8232626914978027, + "objective/train/original_loss": 2.8232626914978027, + "objective/train/theoretical_loss": 3.376393897414129, + "objective/train/tokens_used": 2448568800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24267259240150452, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503901243209839, + "objective/train/weighted_lm_loss": 2.9657554626464844, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9560016989707947, + "theoretical_loss": 3.376393897414129, + "tokens_seen": 2428108800 + }, + { + "epoch": 0.87, + "learning_rate": 6.696935300794552e-05, + "loss": 3.0388, + "theoretical_loss": 3.376350031651565, + "tokens_seen": 2428502016 + }, + { + "epoch": 0.87, + "learning_rate": 6.678017404464623e-05, + "loss": 3.0579, + "theoretical_loss": 3.37623310071761, + "tokens_seen": 2429550592 + }, + { + "epoch": 0.87, + "learning_rate": 6.659099508134696e-05, + "loss": 3.0543, + "theoretical_loss": 3.376116234362968, + "tokens_seen": 2430599168 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.4831455647945404, + "objective/train/docs_used": 1366375, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2068450450897217, + "objective/train/original_loss": 3.2068448066711426, + "objective/train/theoretical_loss": 3.376028626938956, + "objective/train/tokens_used": 2451845600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2415134757757187, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495421886444092, + "objective/train/weighted_lm_loss": 3.365105152130127, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9523228406906128, + "theoretical_loss": 3.376028626938956, + "tokens_seen": 2431385600 + }, + { + "epoch": 0.87, + "learning_rate": 6.640181611804767e-05, + "loss": 3.0921, + "theoretical_loss": 3.375999432524136, + "tokens_seen": 2431647744 + }, + { + "epoch": 0.87, + "learning_rate": 6.621263715474839e-05, + "loss": 3.0504, + "theoretical_loss": 3.3758826951377006, + "tokens_seen": 2432696320 + }, + { + "epoch": 0.87, + "learning_rate": 6.602345819144911e-05, + "loss": 3.0778, + "theoretical_loss": 3.375766022140338, + "tokens_seen": 2433744896 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.48540857434272766, + "objective/train/docs_used": 1367967, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.798003673553467, + "objective/train/original_loss": 2.798003673553467, + "objective/train/theoretical_loss": 3.375663986037095, + "objective/train/tokens_used": 2455122400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24191917479038239, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497705936431885, + "objective/train/weighted_lm_loss": 2.936539649963379, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.951850414276123, + "theoretical_loss": 3.375663986037095, + "tokens_seen": 2434662400 + }, + { + "epoch": 0.87, + "learning_rate": 6.583427922814983e-05, + "loss": 3.1003, + "theoretical_loss": 3.3756494134688144, + "tokens_seen": 2434793472 + }, + { + "epoch": 0.87, + "learning_rate": 6.564510026485054e-05, + "loss": 3.0541, + "theoretical_loss": 3.3755328690599846, + "tokens_seen": 2435842048 + }, + { + "epoch": 0.87, + "learning_rate": 6.545592130155127e-05, + "loss": 3.0621, + "theoretical_loss": 3.3754163888507933, + "tokens_seen": 2436890624 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.47676169872283936, + "objective/train/docs_used": 1369908, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7874505519866943, + "objective/train/original_loss": 2.7874507904052734, + "objective/train/theoretical_loss": 3.375299972778273, + "objective/train/tokens_used": 2458399200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23362308740615845, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048863410949707, + "objective/train/weighted_lm_loss": 2.9224209785461426, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.951828122138977, + "theoretical_loss": 3.375299972778273, + "tokens_seen": 2437939200 + }, + { + "epoch": 0.87, + "learning_rate": 6.526674233825198e-05, + "loss": 2.9885, + "theoretical_loss": 3.375299972778273, + "tokens_seen": 2437939200 + }, + { + "epoch": 0.87, + "learning_rate": 6.50775633749527e-05, + "loss": 2.9628, + "theoretical_loss": 3.3751836207795463, + "tokens_seen": 2438987776 + }, + { + "epoch": 0.87, + "learning_rate": 6.488838441165343e-05, + "loss": 3.0596, + "theoretical_loss": 3.375067332791823, + "tokens_seen": 2440036352 + }, + { + "epoch": 0.87, + "learning_rate": 6.469920544835414e-05, + "loss": 3.0705, + "theoretical_loss": 3.3749511087524033, + "tokens_seen": 2441084928 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.4786227345466614, + "objective/train/docs_used": 1371503, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.74287486076355, + "objective/train/original_loss": 2.7428746223449707, + "objective/train/theoretical_loss": 3.3749365852407216, + "objective/train/tokens_used": 2461676000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23829062283039093, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049073338508606, + "objective/train/weighted_lm_loss": 2.8782496452331543, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9517157673835754, + "theoretical_loss": 3.3749365852407216, + "tokens_seen": 2441216000 + }, + { + "epoch": 0.87, + "learning_rate": 6.451002648505486e-05, + "loss": 3.0573, + "theoretical_loss": 3.3748349485986737, + "tokens_seen": 2442133504 + }, + { + "epoch": 0.87, + "learning_rate": 6.432084752175558e-05, + "loss": 3.1075, + "theoretical_loss": 3.37471885226811, + "tokens_seen": 2443182080 + }, + { + "epoch": 0.87, + "learning_rate": 6.41316685584563e-05, + "loss": 3.033, + "theoretical_loss": 3.3746028196982762, + "tokens_seen": 2444230656 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.4795815050601959, + "objective/train/docs_used": 1373215, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7482497692108154, + "objective/train/original_loss": 2.7482497692108154, + "objective/train/theoretical_loss": 3.3745738215111234, + "objective/train/tokens_used": 2464952800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23792575299739838, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049167513847351, + "objective/train/weighted_lm_loss": 2.8822453022003174, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9520523548126221, + "theoretical_loss": 3.3745738215111234, + "tokens_seen": 2444492800 + }, + { + "epoch": 0.87, + "learning_rate": 6.394248959515701e-05, + "loss": 3.0633, + "theoretical_loss": 3.3744868508268233, + "tokens_seen": 2445279232 + }, + { + "epoch": 0.87, + "learning_rate": 6.375331063185774e-05, + "loss": 3.0684, + "theoretical_loss": 3.3743709455914903, + "tokens_seen": 2446327808 + }, + { + "epoch": 0.87, + "learning_rate": 6.356413166855845e-05, + "loss": 3.0596, + "theoretical_loss": 3.3742551039301043, + "tokens_seen": 2447376384 + }, + { + "epoch": 0.87, + "objective/train/advantage_avg": 0.4921453595161438, + "objective/train/docs_used": 1375012, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.813209295272827, + "objective/train/original_loss": 2.8132095336914062, + "objective/train/theoretical_loss": 3.374211679684568, + "objective/train/tokens_used": 2468229600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2444104254245758, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504570007324219, + "objective/train/weighted_lm_loss": 2.9547219276428223, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9768006205558777, + "theoretical_loss": 3.374211679684568, + "tokens_seen": 2447769600 + }, + { + "epoch": 0.87, + "learning_rate": 6.337495270525917e-05, + "loss": 2.9949, + "theoretical_loss": 3.374139325780579, + "tokens_seen": 2448424960 + }, + { + "epoch": 0.87, + "learning_rate": 6.318577374195989e-05, + "loss": 3.0415, + "theoretical_loss": 3.374023611080915, + "tokens_seen": 2449473536 + }, + { + "epoch": 0.88, + "learning_rate": 6.299659477866061e-05, + "loss": 3.1023, + "theoretical_loss": 3.3739079597692014, + "tokens_seen": 2450522112 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.48276713490486145, + "objective/train/docs_used": 1376896, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6670351028442383, + "objective/train/original_loss": 2.6670355796813965, + "objective/train/theoretical_loss": 3.373850157864502, + "objective/train/tokens_used": 2471506400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23791970312595367, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494859218597412, + "objective/train/weighted_lm_loss": 2.799875259399414, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9536236524581909, + "theoretical_loss": 3.373850157864502, + "tokens_seen": 2451046400 + }, + { + "epoch": 0.88, + "learning_rate": 6.280741581536132e-05, + "loss": 3.1117, + "theoretical_loss": 3.3737923717836127, + "tokens_seen": 2451570688 + }, + { + "epoch": 0.88, + "learning_rate": 6.261823685206205e-05, + "loss": 3.1263, + "theoretical_loss": 3.3736768470624106, + "tokens_seen": 2452619264 + }, + { + "epoch": 0.88, + "learning_rate": 6.242905788876277e-05, + "loss": 2.9923, + "theoretical_loss": 3.373561385543943, + "tokens_seen": 2453667840 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.4910533130168915, + "objective/train/docs_used": 1379050, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7924742698669434, + "objective/train/original_loss": 2.7924742698669434, + "objective/train/theoretical_loss": 3.373489254162681, + "objective/train/tokens_used": 2474783200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24239566922187805, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503374338150024, + "objective/train/weighted_lm_loss": 2.933199405670166, + "objective/train/weights_max": 1.0512187480926514, + "objective/train/weights_min": 0.9563108682632446, + "theoretical_loss": 3.373489254162681, + "tokens_seen": 2454323200 + }, + { + "epoch": 0.88, + "learning_rate": 6.22398789254635e-05, + "loss": 3.0166, + "theoretical_loss": 3.3734459871666456, + "tokens_seen": 2454716416 + }, + { + "epoch": 0.88, + "learning_rate": 6.205069996216422e-05, + "loss": 3.1157, + "theoretical_loss": 3.373330651869039, + "tokens_seen": 2455764992 + }, + { + "epoch": 0.88, + "learning_rate": 6.186152099886492e-05, + "loss": 3.1398, + "theoretical_loss": 3.373215379589729, + "tokens_seen": 2456813568 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.49097520112991333, + "objective/train/docs_used": 1380428, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9203779697418213, + "objective/train/original_loss": 2.9203779697418213, + "objective/train/theoretical_loss": 3.3731289666991215, + "objective/train/tokens_used": 2478060000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2459252029657364, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503475666046143, + "objective/train/weighted_lm_loss": 3.0663514137268066, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9521451592445374, + "theoretical_loss": 3.3731289666991215, + "tokens_seen": 2457600000 + }, + { + "epoch": 0.88, + "learning_rate": 6.167234203556564e-05, + "loss": 3.0337, + "theoretical_loss": 3.3731001702674104, + "tokens_seen": 2457862144 + }, + { + "epoch": 0.88, + "learning_rate": 6.148316307226636e-05, + "loss": 3.0836, + "theoretical_loss": 3.3729850238408607, + "tokens_seen": 2458910720 + }, + { + "epoch": 0.88, + "learning_rate": 6.129398410896708e-05, + "loss": 3.0798, + "theoretical_loss": 3.372869940248944, + "tokens_seen": 2459959296 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.4859597980976105, + "objective/train/docs_used": 1382125, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.264944314956665, + "objective/train/original_loss": 3.264944076538086, + "objective/train/theoretical_loss": 3.3727692936020572, + "objective/train/tokens_used": 2481336800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24115294218063354, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498218536376953, + "objective/train/weighted_lm_loss": 3.4269485473632812, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9694421291351318, + "theoretical_loss": 3.3727692936020572, + "tokens_seen": 2460876800 + }, + { + "epoch": 0.88, + "learning_rate": 6.11048051456678e-05, + "loss": 3.0595, + "theoretical_loss": 3.3727549194306112, + "tokens_seen": 2461007872 + }, + { + "epoch": 0.88, + "learning_rate": 6.0915626182368526e-05, + "loss": 3.1255, + "theoretical_loss": 3.372639961324896, + "tokens_seen": 2462056448 + }, + { + "epoch": 0.88, + "learning_rate": 6.072644721906924e-05, + "loss": 3.0553, + "theoretical_loss": 3.37252506587092, + "tokens_seen": 2463105024 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.4852433204650879, + "objective/train/docs_used": 1384145, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.900221586227417, + "objective/train/original_loss": 2.900221347808838, + "objective/train/theoretical_loss": 3.372410233007887, + "objective/train/tokens_used": 2484613600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23983712494373322, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497432947158813, + "objective/train/weighted_lm_loss": 3.0446298122406006, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9515975117683411, + "theoretical_loss": 3.372410233007887, + "tokens_seen": 2464153600 + }, + { + "epoch": 0.88, + "learning_rate": 6.053726825576996e-05, + "loss": 3.05, + "theoretical_loss": 3.372410233007887, + "tokens_seen": 2464153600 + }, + { + "epoch": 0.88, + "learning_rate": 6.034808929247068e-05, + "loss": 3.1139, + "theoretical_loss": 3.372295462675088, + "tokens_seen": 2465202176 + }, + { + "epoch": 0.88, + "learning_rate": 6.0158910329171394e-05, + "loss": 3.0191, + "theoretical_loss": 3.372180754811897, + "tokens_seen": 2466250752 + }, + { + "epoch": 0.88, + "learning_rate": 5.9969731365872115e-05, + "loss": 3.0621, + "theoretical_loss": 3.3720661093577737, + "tokens_seen": 2467299328 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.4838484823703766, + "objective/train/docs_used": 1386568, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.136174201965332, + "objective/train/original_loss": 3.136174440383911, + "objective/train/theoretical_loss": 3.372051783061134, + "objective/train/tokens_used": 2487890400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23954464495182037, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496023893356323, + "objective/train/weighted_lm_loss": 3.292409896850586, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9591426253318787, + "theoretical_loss": 3.372051783061134, + "tokens_seen": 2467430400 + }, + { + "epoch": 0.88, + "learning_rate": 5.9780552402572835e-05, + "loss": 3.142, + "theoretical_loss": 3.3719515262522615, + "tokens_seen": 2468347904 + }, + { + "epoch": 0.88, + "learning_rate": 5.959137343927355e-05, + "loss": 3.0806, + "theoretical_loss": 3.3718370054349878, + "tokens_seen": 2469396480 + }, + { + "epoch": 0.88, + "learning_rate": 5.9402194475974277e-05, + "loss": 3.0506, + "theoretical_loss": 3.371722546845665, + "tokens_seen": 2470445056 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.47412246465682983, + "objective/train/docs_used": 1387925, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0952136516571045, + "objective/train/original_loss": 3.0952138900756836, + "objective/train/theoretical_loss": 3.3716939419143945, + "objective/train/tokens_used": 2491167200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2385615110397339, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0486245155334473, + "objective/train/weighted_lm_loss": 3.2452621459960938, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9514861106872559, + "theoretical_loss": 3.3716939419143945, + "tokens_seen": 2470707200 + }, + { + "epoch": 0.88, + "learning_rate": 5.9213015512675e-05, + "loss": 3.0562, + "theoretical_loss": 3.3716081504240885, + "tokens_seen": 2471493632 + }, + { + "epoch": 0.88, + "learning_rate": 5.902383654937571e-05, + "loss": 3.063, + "theoretical_loss": 3.3714938161101378, + "tokens_seen": 2472542208 + }, + { + "epoch": 0.88, + "learning_rate": 5.883465758607643e-05, + "loss": 3.1033, + "theoretical_loss": 3.3713795438437764, + "tokens_seen": 2473590784 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.49264079332351685, + "objective/train/docs_used": 1390126, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.122264862060547, + "objective/train/original_loss": 3.122264862060547, + "objective/train/theoretical_loss": 3.371336707728296, + "objective/train/tokens_used": 2494444000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24525536596775055, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505108833312988, + "objective/train/weighted_lm_loss": 3.279707193374634, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9543718099594116, + "theoretical_loss": 3.371336707728296, + "tokens_seen": 2473984000 + }, + { + "epoch": 0.88, + "learning_rate": 5.864547862277715e-05, + "loss": 3.0441, + "theoretical_loss": 3.3712653335650504, + "tokens_seen": 2474639360 + }, + { + "epoch": 0.88, + "learning_rate": 5.8456299659477866e-05, + "loss": 3.0489, + "theoretical_loss": 3.3711511852140905, + "tokens_seen": 2475687936 + }, + { + "epoch": 0.88, + "learning_rate": 5.8267120696178586e-05, + "loss": 3.0745, + "theoretical_loss": 3.3710370987311085, + "tokens_seen": 2476736512 + }, + { + "epoch": 0.88, + "objective/train/advantage_avg": 0.4860904812812805, + "objective/train/docs_used": 1392376, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2137277126312256, + "objective/train/original_loss": 3.2137279510498047, + "objective/train/theoretical_loss": 3.3709800786714488, + "objective/train/tokens_used": 2497720800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24208112061023712, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498394966125488, + "objective/train/weighted_lm_loss": 3.373750925064087, + "objective/train/weights_max": 1.0512208938598633, + "objective/train/weights_min": 0.9516987800598145, + "theoretical_loss": 3.3709800786714488, + "tokens_seen": 2477260800 + }, + { + "epoch": 0.89, + "learning_rate": 5.807794173287931e-05, + "loss": 3.0616, + "theoretical_loss": 3.3709230740564013, + "tokens_seen": 2477785088 + }, + { + "epoch": 0.89, + "learning_rate": 5.788876276958002e-05, + "loss": 3.0668, + "theoretical_loss": 3.3708091111303475, + "tokens_seen": 2478833664 + }, + { + "epoch": 0.89, + "learning_rate": 5.769958380628074e-05, + "loss": 3.0424, + "theoretical_loss": 3.370695209893409, + "tokens_seen": 2479882240 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.486337274312973, + "objective/train/docs_used": 1394461, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0017459392547607, + "objective/train/original_loss": 3.0017457008361816, + "objective/train/theoretical_loss": 3.370624052920404, + "objective/train/tokens_used": 2500997600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24043717980384827, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498559474945068, + "objective/train/weighted_lm_loss": 3.151277542114258, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9519734382629395, + "theoretical_loss": 3.370624052920404, + "tokens_seen": 2480537600 + }, + { + "epoch": 0.89, + "learning_rate": 5.751040484298146e-05, + "loss": 2.9919, + "theoretical_loss": 3.3705813702861294, + "tokens_seen": 2480930816 + }, + { + "epoch": 0.89, + "learning_rate": 5.732122587968218e-05, + "loss": 2.9639, + "theoretical_loss": 3.370467592249135, + "tokens_seen": 2481979392 + }, + { + "epoch": 0.89, + "learning_rate": 5.71320469163829e-05, + "loss": 3.0105, + "theoretical_loss": 3.3703538757231355, + "tokens_seen": 2483027968 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.4779159724712372, + "objective/train/docs_used": 1396627, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1431424617767334, + "objective/train/original_loss": 3.1431422233581543, + "objective/train/theoretical_loss": 3.370268628659605, + "objective/train/tokens_used": 2504274400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23592840135097504, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489908456802368, + "objective/train/weighted_lm_loss": 3.2960641384124756, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9546563625335693, + "theoretical_loss": 3.370268628659605, + "tokens_seen": 2483814400 + }, + { + "epoch": 0.89, + "learning_rate": 5.694286795308362e-05, + "loss": 3.0689, + "theoretical_loss": 3.3702402206489213, + "tokens_seen": 2484076544 + }, + { + "epoch": 0.89, + "learning_rate": 5.675368898978434e-05, + "loss": 2.9845, + "theoretical_loss": 3.3701266269673655, + "tokens_seen": 2485125120 + }, + { + "epoch": 0.89, + "learning_rate": 5.656451002648506e-05, + "loss": 3.0257, + "theoretical_loss": 3.3700130946194222, + "tokens_seen": 2486173696 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.4774656891822815, + "objective/train/docs_used": 1398483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0128564834594727, + "objective/train/original_loss": 3.0128560066223145, + "objective/train/theoretical_loss": 3.369913804081346, + "objective/train/tokens_used": 2507551200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23861047625541687, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489592552185059, + "objective/train/weighted_lm_loss": 3.159787654876709, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9514344334602356, + "theoretical_loss": 3.369913804081346, + "tokens_seen": 2487091200 + }, + { + "epoch": 0.89, + "learning_rate": 5.637533106318578e-05, + "loss": 2.9467, + "theoretical_loss": 3.3698996235461283, + "tokens_seen": 2487222272 + }, + { + "epoch": 0.89, + "learning_rate": 5.618615209988649e-05, + "loss": 3.0314, + "theoretical_loss": 3.369786213688601, + "tokens_seen": 2488270848 + }, + { + "epoch": 0.89, + "learning_rate": 5.599697313658721e-05, + "loss": 3.0424, + "theoretical_loss": 3.3696728649880403, + "tokens_seen": 2489319424 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.4827979803085327, + "objective/train/docs_used": 1400481, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7925925254821777, + "objective/train/original_loss": 2.7925925254821777, + "objective/train/theoretical_loss": 3.369559577385726, + "objective/train/tokens_used": 2510828000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23709921538829803, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494848489761353, + "objective/train/weighted_lm_loss": 2.9326014518737793, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9522310495376587, + "theoretical_loss": 3.369559577385726, + "tokens_seen": 2490368000 + }, + { + "epoch": 0.89, + "learning_rate": 5.580779417328793e-05, + "loss": 2.9912, + "theoretical_loss": 3.369559577385726, + "tokens_seen": 2490368000 + }, + { + "epoch": 0.89, + "learning_rate": 5.561861520998865e-05, + "loss": 2.9337, + "theoretical_loss": 3.36944635082302, + "tokens_seen": 2491416576 + }, + { + "epoch": 0.89, + "learning_rate": 5.542943624668937e-05, + "loss": 2.9607, + "theoretical_loss": 3.3693331852413637, + "tokens_seen": 2492465152 + }, + { + "epoch": 0.89, + "learning_rate": 5.524025728339009e-05, + "loss": 3.0256, + "theoretical_loss": 3.3692200805822816, + "tokens_seen": 2493513728 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.47895485162734985, + "objective/train/docs_used": 1401835, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9839255809783936, + "objective/train/original_loss": 2.9839253425598145, + "objective/train/theoretical_loss": 3.369205946780606, + "objective/train/tokens_used": 2514104800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23692239820957184, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490996837615967, + "objective/train/weighted_lm_loss": 3.1297733783721924, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9525783061981201, + "theoretical_loss": 3.369205946780606, + "tokens_seen": 2493644800 + }, + { + "epoch": 0.89, + "learning_rate": 5.505107832009081e-05, + "loss": 3.0499, + "theoretical_loss": 3.369107036787377, + "tokens_seen": 2494562304 + }, + { + "epoch": 0.89, + "learning_rate": 5.486189935679153e-05, + "loss": 2.9932, + "theoretical_loss": 3.3689940537983345, + "tokens_seen": 2495610880 + }, + { + "epoch": 0.89, + "learning_rate": 5.467272039349225e-05, + "loss": 3.0425, + "theoretical_loss": 3.368881131556918, + "tokens_seen": 2496659456 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.48761147260665894, + "objective/train/docs_used": 1403825, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8729476928710938, + "objective/train/original_loss": 2.8729474544525146, + "objective/train/theoretical_loss": 3.3688529104815634, + "objective/train/tokens_used": 2517381600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23985113203525543, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499802827835083, + "objective/train/weighted_lm_loss": 3.0175294876098633, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9651005268096924, + "theoretical_loss": 3.3688529104815634, + "tokens_seen": 2496921600 + }, + { + "epoch": 0.89, + "learning_rate": 5.448354143019296e-05, + "loss": 3.0147, + "theoretical_loss": 3.368768270004973, + "tokens_seen": 2497708032 + }, + { + "epoch": 0.89, + "learning_rate": 5.4294362466893684e-05, + "loss": 2.9441, + "theoretical_loss": 3.368655469084424, + "tokens_seen": 2498756608 + }, + { + "epoch": 0.89, + "learning_rate": 5.4105183503594404e-05, + "loss": 2.9964, + "theoretical_loss": 3.3685427287372764, + "tokens_seen": 2499805184 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.4772385358810425, + "objective/train/docs_used": 1405631, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6067678928375244, + "objective/train/original_loss": 2.6067678928375244, + "objective/train/theoretical_loss": 3.3685004667118528, + "objective/train/tokens_used": 2520658400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2354532927274704, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489205121994019, + "objective/train/weighted_lm_loss": 2.7343697547912598, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9528263807296753, + "theoretical_loss": 3.3685004667118528, + "tokens_seen": 2500198400 + }, + { + "epoch": 0.89, + "learning_rate": 5.391600454029512e-05, + "loss": 3.0273, + "theoretical_loss": 3.3684300489056143, + "tokens_seen": 2500853760 + }, + { + "epoch": 0.89, + "learning_rate": 5.372682557699584e-05, + "loss": 3.0264, + "theoretical_loss": 3.3683174295316025, + "tokens_seen": 2501902336 + }, + { + "epoch": 0.89, + "learning_rate": 5.353764661369656e-05, + "loss": 2.9834, + "theoretical_loss": 3.368204870557484, + "tokens_seen": 2502950912 + }, + { + "epoch": 0.89, + "objective/train/advantage_avg": 0.4770272374153137, + "objective/train/docs_used": 1407612, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.106952667236328, + "objective/train/original_loss": 3.106952667236328, + "objective/train/theoretical_loss": 3.3681486137023575, + "objective/train/tokens_used": 2523935200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2343224287033081, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488938093185425, + "objective/train/weighted_lm_loss": 3.259089946746826, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9528036713600159, + "theoretical_loss": 3.3681486137023575, + "tokens_seen": 2503475200 + }, + { + "epoch": 0.89, + "learning_rate": 5.334846765039727e-05, + "loss": 3.0631, + "theoretical_loss": 3.368092371925582, + "tokens_seen": 2503999488 + }, + { + "epoch": 0.89, + "learning_rate": 5.3159288687097994e-05, + "loss": 3.0072, + "theoretical_loss": 3.3679799335782996, + "tokens_seen": 2505048064 + }, + { + "epoch": 0.9, + "learning_rate": 5.297010972379872e-05, + "loss": 2.9526, + "theoretical_loss": 3.3678675554581172, + "tokens_seen": 2506096640 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.48289960622787476, + "objective/train/docs_used": 1409433, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1777186393737793, + "objective/train/original_loss": 3.1777186393737793, + "objective/train/theoretical_loss": 3.3677973496915516, + "objective/train/tokens_used": 2527212000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23917822539806366, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495057106018066, + "objective/train/weighted_lm_loss": 3.33475399017334, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.952046275138855, + "theoretical_loss": 3.3677973496915516, + "tokens_seen": 2506752000 + }, + { + "epoch": 0.9, + "learning_rate": 5.2780930760499435e-05, + "loss": 3.0682, + "theoretical_loss": 3.367755237507595, + "tokens_seen": 2507145216 + }, + { + "epoch": 0.9, + "learning_rate": 5.2591751797200155e-05, + "loss": 3.0621, + "theoretical_loss": 3.367642979669373, + "tokens_seen": 2508193792 + }, + { + "epoch": 0.9, + "learning_rate": 5.2402572833900876e-05, + "loss": 3.0179, + "theoretical_loss": 3.3675307818861677, + "tokens_seen": 2509242368 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.4875558018684387, + "objective/train/docs_used": 1411323, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0832877159118652, + "objective/train/original_loss": 3.083287239074707, + "objective/train/theoretical_loss": 3.367446672925454, + "objective/train/tokens_used": 2530488800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24067936837673187, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499789714813232, + "objective/train/weighted_lm_loss": 3.2369744777679443, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9582372307777405, + "theoretical_loss": 3.367446672925454, + "tokens_seen": 2510028800 + }, + { + "epoch": 0.9, + "learning_rate": 5.221339387060159e-05, + "loss": 3.0508, + "theoretical_loss": 3.367418644100776, + "tokens_seen": 2510290944 + }, + { + "epoch": 0.9, + "learning_rate": 5.202421490730231e-05, + "loss": 3.0319, + "theoretical_loss": 3.367306566256072, + "tokens_seen": 2511339520 + }, + { + "epoch": 0.9, + "learning_rate": 5.183503594400303e-05, + "loss": 3.0629, + "theoretical_loss": 3.3671945482950085, + "tokens_seen": 2512388096 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.4792327284812927, + "objective/train/docs_used": 1413432, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.026296854019165, + "objective/train/original_loss": 3.0262961387634277, + "objective/train/theoretical_loss": 3.3670965816575897, + "objective/train/tokens_used": 2533765600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23847414553165436, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491353273391724, + "objective/train/weighted_lm_loss": 3.174064874649048, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9517701268196106, + "theoretical_loss": 3.3670965816575897, + "tokens_seen": 2513305600 + }, + { + "epoch": 0.9, + "learning_rate": 5.1645856980703744e-05, + "loss": 3.1064, + "theoretical_loss": 3.3670825901606167, + "tokens_seen": 2513436672 + }, + { + "epoch": 0.9, + "learning_rate": 5.1456678017404465e-05, + "loss": 3.0377, + "theoretical_loss": 3.3669706917960047, + "tokens_seen": 2514485248 + }, + { + "epoch": 0.9, + "learning_rate": 5.1267499054105186e-05, + "loss": 3.0616, + "theoretical_loss": 3.3668588531443593, + "tokens_seen": 2515533824 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.48915207386016846, + "objective/train/docs_used": 1415580, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.92588210105896, + "objective/train/original_loss": 2.92588210105896, + "objective/train/theoretical_loss": 3.3667470741489445, + "objective/train/tokens_used": 2537042400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24214540421962738, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501459836959839, + "objective/train/weighted_lm_loss": 3.072300434112549, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9553421139717102, + "theoretical_loss": 3.3667470741489445, + "tokens_seen": 2516582400 + }, + { + "epoch": 0.9, + "learning_rate": 5.10783200908059e-05, + "loss": 3.0839, + "theoretical_loss": 3.3667470741489445, + "tokens_seen": 2516582400 + }, + { + "epoch": 0.9, + "learning_rate": 5.088914112750662e-05, + "loss": 3.0554, + "theoretical_loss": 3.366635354753102, + "tokens_seen": 2517630976 + }, + { + "epoch": 0.9, + "learning_rate": 5.069996216420735e-05, + "loss": 2.977, + "theoretical_loss": 3.3665236949002515, + "tokens_seen": 2518679552 + }, + { + "epoch": 0.9, + "learning_rate": 5.051078320090806e-05, + "loss": 3.059, + "theoretical_loss": 3.3664120945338882, + "tokens_seen": 2519728128 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.4885351061820984, + "objective/train/docs_used": 1417591, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9278507232666016, + "objective/train/original_loss": 2.9278504848480225, + "objective/train/theoretical_loss": 3.3663981486679257, + "objective/train/tokens_used": 2540319200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24352295696735382, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.05009126663208, + "objective/train/weighted_lm_loss": 3.0739758014678955, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9517725110054016, + "theoretical_loss": 3.3663981486679257, + "tokens_seen": 2519859200 + }, + { + "epoch": 0.9, + "learning_rate": 5.032160423760878e-05, + "loss": 2.9885, + "theoretical_loss": 3.3663005535975867, + "tokens_seen": 2520776704 + }, + { + "epoch": 0.9, + "learning_rate": 5.01324252743095e-05, + "loss": 3.0065, + "theoretical_loss": 3.3661890720349965, + "tokens_seen": 2521825280 + }, + { + "epoch": 0.9, + "learning_rate": 4.9943246311010216e-05, + "loss": 3.0255, + "theoretical_loss": 3.366077649789845, + "tokens_seen": 2522873856 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.48337897658348083, + "objective/train/docs_used": 1418957, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5253660678863525, + "objective/train/original_loss": 2.5253658294677734, + "objective/train/theoretical_loss": 3.36604980349032, + "objective/train/tokens_used": 2543596000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2389601469039917, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495524406433105, + "objective/train/weighted_lm_loss": 2.650407314300537, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9553355574607849, + "theoretical_loss": 3.36604980349032, + "tokens_seen": 2523136000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9754067347710936e-05, + "loss": 2.9571, + "theoretical_loss": 3.365966286805936, + "tokens_seen": 2523922432 + }, + { + "epoch": 0.9, + "learning_rate": 4.956488838441166e-05, + "loss": 2.9194, + "theoretical_loss": 3.365854983027151, + "tokens_seen": 2524971008 + }, + { + "epoch": 0.9, + "learning_rate": 4.937570942111237e-05, + "loss": 2.9091, + "theoretical_loss": 3.3657437383974456, + "tokens_seen": 2526019584 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.4876956641674042, + "objective/train/docs_used": 1420931, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1945478916168213, + "objective/train/original_loss": 3.1945481300354004, + "objective/train/theoretical_loss": 3.3657020368992527, + "objective/train/tokens_used": 2546872800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24205021560192108, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499999523162842, + "objective/train/weighted_lm_loss": 3.353463649749756, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9520066976547241, + "theoretical_loss": 3.3657020368992527, + "tokens_seen": 2526412800 + }, + { + "epoch": 0.9, + "learning_rate": 4.918653045781309e-05, + "loss": 2.9435, + "theoretical_loss": 3.3656325528608533, + "tokens_seen": 2527068160 + }, + { + "epoch": 0.9, + "learning_rate": 4.899735149451381e-05, + "loss": 2.9498, + "theoretical_loss": 3.365521426361483, + "tokens_seen": 2528116736 + }, + { + "epoch": 0.9, + "learning_rate": 4.8808172531214526e-05, + "loss": 2.9501, + "theoretical_loss": 3.365410358843522, + "tokens_seen": 2529165312 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.4870266616344452, + "objective/train/docs_used": 1422953, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5946903228759766, + "objective/train/original_loss": 2.5946898460388184, + "objective/train/theoretical_loss": 3.3653548471851478, + "objective/train/tokens_used": 2550149600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24236759543418884, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499347448349, + "objective/train/weighted_lm_loss": 2.725821018218994, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9607920050621033, + "theoretical_loss": 3.3653548471851478, + "tokens_seen": 2529689600 + }, + { + "epoch": 0.9, + "learning_rate": 4.861899356791525e-05, + "loss": 2.9142, + "theoretical_loss": 3.365299350251229, + "tokens_seen": 2530213888 + }, + { + "epoch": 0.9, + "learning_rate": 4.8429814604615973e-05, + "loss": 2.9477, + "theoretical_loss": 3.3651884005289423, + "tokens_seen": 2531262464 + }, + { + "epoch": 0.9, + "learning_rate": 4.824063564131669e-05, + "loss": 2.9652, + "theoretical_loss": 3.3650775096210745, + "tokens_seen": 2532311040 + }, + { + "epoch": 0.9, + "objective/train/advantage_avg": 0.47654762864112854, + "objective/train/docs_used": 1425467, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.940070629119873, + "objective/train/original_loss": 2.940070152282715, + "objective/train/theoretical_loss": 3.365008232645685, + "objective/train/tokens_used": 2553426400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24106010794639587, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048879623413086, + "objective/train/weighted_lm_loss": 3.083993434906006, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.9513778686523438, + "theoretical_loss": 3.365008232645685, + "tokens_seen": 2532966400 + }, + { + "epoch": 0.9, + "learning_rate": 4.805145667801741e-05, + "loss": 2.9732, + "theoretical_loss": 3.3649666774721134, + "tokens_seen": 2533359616 + }, + { + "epoch": 0.91, + "learning_rate": 4.786227771471813e-05, + "loss": 3.0567, + "theoretical_loss": 3.3648559040266224, + "tokens_seen": 2534408192 + }, + { + "epoch": 0.91, + "learning_rate": 4.767309875141884e-05, + "loss": 2.9618, + "theoretical_loss": 3.36474518922924, + "tokens_seen": 2535456768 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.45745569467544556, + "objective/train/docs_used": 1426787, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7048633098602295, + "objective/train/original_loss": 2.7048633098602295, + "objective/train/theoretical_loss": 3.3646621915857633, + "objective/train/tokens_used": 2556703200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22381484508514404, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0468831062316895, + "objective/train/weighted_lm_loss": 2.8340065479278564, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9606444239616394, + "theoretical_loss": 3.3646621915857633, + "tokens_seen": 2536243200 + }, + { + "epoch": 0.91, + "learning_rate": 4.748391978811956e-05, + "loss": 3.0131, + "theoretical_loss": 3.36463453302468, + "tokens_seen": 2536505344 + }, + { + "epoch": 0.91, + "learning_rate": 4.729474082482028e-05, + "loss": 2.9159, + "theoretical_loss": 3.364523935357731, + "tokens_seen": 2537553920 + }, + { + "epoch": 0.91, + "learning_rate": 4.7105561861521e-05, + "loss": 3.0107, + "theoretical_loss": 3.3644133961732567, + "tokens_seen": 2538602496 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.4876222312450409, + "objective/train/docs_used": 1428565, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.81592059135437, + "objective/train/original_loss": 2.815920352935791, + "objective/train/theoretical_loss": 3.3643167223174584, + "objective/train/tokens_used": 2559980000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24069343507289886, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499857664108276, + "objective/train/weighted_lm_loss": 2.9560835361480713, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9616897106170654, + "theoretical_loss": 3.3643167223174584, + "tokens_seen": 2539520000 + }, + { + "epoch": 0.91, + "learning_rate": 4.691638289822172e-05, + "loss": 2.977, + "theoretical_loss": 3.3643029154161948, + "tokens_seen": 2539651072 + }, + { + "epoch": 0.91, + "learning_rate": 4.672720393492244e-05, + "loss": 2.9811, + "theoretical_loss": 3.364192493031558, + "tokens_seen": 2540699648 + }, + { + "epoch": 0.91, + "learning_rate": 4.653802497162315e-05, + "loss": 2.9772, + "theoretical_loss": 3.3640821289644336, + "tokens_seen": 2541748224 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.48290324211120605, + "objective/train/docs_used": 1430679, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6714377403259277, + "objective/train/original_loss": 2.6714377403259277, + "objective/train/theoretical_loss": 3.363971823159983, + "objective/train/tokens_used": 2563256800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2380242496728897, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495001077651978, + "objective/train/weighted_lm_loss": 2.803065299987793, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.951354444026947, + "theoretical_loss": 3.363971823159983, + "tokens_seen": 2542796800 + }, + { + "epoch": 0.91, + "learning_rate": 4.634884600832388e-05, + "loss": 2.9188, + "theoretical_loss": 3.363971823159983, + "tokens_seen": 2542796800 + }, + { + "epoch": 0.91, + "learning_rate": 4.61596670450246e-05, + "loss": 2.9692, + "theoretical_loss": 3.363861575563442, + "tokens_seen": 2543845376 + }, + { + "epoch": 0.91, + "learning_rate": 4.5970488081725313e-05, + "loss": 3.0068, + "theoretical_loss": 3.363751386120119, + "tokens_seen": 2544893952 + }, + { + "epoch": 0.91, + "learning_rate": 4.5781309118426034e-05, + "loss": 2.921, + "theoretical_loss": 3.363641254775399, + "tokens_seen": 2545942528 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.4920687675476074, + "objective/train/docs_used": 1432418, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8956644535064697, + "objective/train/original_loss": 2.8956642150878906, + "objective/train/theoretical_loss": 3.3636274924396496, + "objective/train/tokens_used": 2566533600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2425273358821869, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050439715385437, + "objective/train/weighted_lm_loss": 3.042595863342285, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 1.0194487571716309, + "theoretical_loss": 3.3636274924396496, + "tokens_seen": 2546073600 + }, + { + "epoch": 0.91, + "learning_rate": 4.5592130155126755e-05, + "loss": 3.0193, + "theoretical_loss": 3.3635311814747384, + "tokens_seen": 2546991104 + }, + { + "epoch": 0.91, + "learning_rate": 4.540295119182747e-05, + "loss": 3.0138, + "theoretical_loss": 3.3634211661636675, + "tokens_seen": 2548039680 + }, + { + "epoch": 0.91, + "learning_rate": 4.521377222852819e-05, + "loss": 2.9616, + "theoretical_loss": 3.363311208787792, + "tokens_seen": 2549088256 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.4532369375228882, + "objective/train/docs_used": 1434243, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.017941474914551, + "objective/train/original_loss": 3.01794171333313, + "objective/train/theoretical_loss": 3.3632837284898294, + "objective/train/tokens_used": 2569810400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23174332082271576, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0465010404586792, + "objective/train/weighted_lm_loss": 3.1610023975372314, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.951921284198761, + "theoretical_loss": 3.3632837284898294, + "tokens_seen": 2549350400 + }, + { + "epoch": 0.91, + "learning_rate": 4.502459326522891e-05, + "loss": 3.0003, + "theoretical_loss": 3.363201309292788, + "tokens_seen": 2550136832 + }, + { + "epoch": 0.91, + "learning_rate": 4.483541430192962e-05, + "loss": 3.0131, + "theoretical_loss": 3.3630914676244075, + "tokens_seen": 2551185408 + }, + { + "epoch": 0.91, + "learning_rate": 4.4646235338630344e-05, + "loss": 3.0015, + "theoretical_loss": 3.3629816837284747, + "tokens_seen": 2552233984 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.4917280673980713, + "objective/train/docs_used": 1436345, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5419411659240723, + "objective/train/original_loss": 2.5419414043426514, + "objective/train/theoretical_loss": 3.362940529650914, + "objective/train/tokens_used": 2573087200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24304305016994476, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0504082441329956, + "objective/train/weighted_lm_loss": 2.669778823852539, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9585726261138916, + "theoretical_loss": 3.362940529650914, + "tokens_seen": 2552627200 + }, + { + "epoch": 0.91, + "learning_rate": 4.4457056375331064e-05, + "loss": 2.951, + "theoretical_loss": 3.362871957550886, + "tokens_seen": 2553282560 + }, + { + "epoch": 0.91, + "learning_rate": 4.4267877412031785e-05, + "loss": 2.9481, + "theoretical_loss": 3.3627622890376117, + "tokens_seen": 2554331136 + }, + { + "epoch": 0.91, + "learning_rate": 4.4078698448732505e-05, + "loss": 2.8988, + "theoretical_loss": 3.3626526781346944, + "tokens_seen": 2555379712 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.48716670274734497, + "objective/train/docs_used": 1438290, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6250998973846436, + "objective/train/original_loss": 2.6251001358032227, + "objective/train/theoretical_loss": 3.362597894270278, + "objective/train/tokens_used": 2576364000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23991335928440094, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499361753463745, + "objective/train/weighted_lm_loss": 2.756412982940674, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.956545352935791, + "theoretical_loss": 3.362597894270278, + "tokens_seen": 2555904000 + }, + { + "epoch": 0.91, + "learning_rate": 4.3889519485433226e-05, + "loss": 2.96, + "theoretical_loss": 3.3625431247882496, + "tokens_seen": 2556428288 + }, + { + "epoch": 0.91, + "learning_rate": 4.370034052213394e-05, + "loss": 2.9932, + "theoretical_loss": 3.3624336289444643, + "tokens_seen": 2557476864 + }, + { + "epoch": 0.91, + "learning_rate": 4.351116155883466e-05, + "loss": 3.0289, + "theoretical_loss": 3.3623241905495993, + "tokens_seen": 2558525440 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.4897761940956116, + "objective/train/docs_used": 1439355, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9495246410369873, + "objective/train/original_loss": 2.9495248794555664, + "objective/train/theoretical_loss": 3.362255820702239, + "objective/train/tokens_used": 2579640800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24264518916606903, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502110719680786, + "objective/train/weighted_lm_loss": 3.0971879959106445, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.957098126411438, + "theoretical_loss": 3.362255820702239, + "tokens_seen": 2559180800 + }, + { + "epoch": 0.91, + "learning_rate": 4.332198259553538e-05, + "loss": 2.9996, + "theoretical_loss": 3.3622148095499864, + "tokens_seen": 2559574016 + }, + { + "epoch": 0.91, + "learning_rate": 4.3132803632236095e-05, + "loss": 3.0646, + "theoretical_loss": 3.3621054858920303, + "tokens_seen": 2560622592 + }, + { + "epoch": 0.91, + "learning_rate": 4.2943624668936815e-05, + "loss": 3.001, + "theoretical_loss": 3.3619962195222075, + "tokens_seen": 2561671168 + }, + { + "epoch": 0.91, + "objective/train/advantage_avg": 0.48951423168182373, + "objective/train/docs_used": 1441205, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.010582447052002, + "objective/train/original_loss": 3.010582685470581, + "objective/train/theoretical_loss": 3.3619143073080204, + "objective/train/tokens_used": 2582917600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24158918857574463, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050179362297058, + "objective/train/weighted_lm_loss": 3.161499261856079, + "objective/train/weights_max": 1.0512157678604126, + "objective/train/weights_min": 0.9615952372550964, + "theoretical_loss": 3.3619143073080204, + "tokens_seen": 2562457600 + }, + { + "epoch": 0.92, + "learning_rate": 4.2754445705637536e-05, + "loss": 2.9713, + "theoretical_loss": 3.3618870103870657, + "tokens_seen": 2562719744 + }, + { + "epoch": 0.92, + "learning_rate": 4.256526674233825e-05, + "loss": 2.9824, + "theoretical_loss": 3.3617778584332254, + "tokens_seen": 2563768320 + }, + { + "epoch": 0.92, + "learning_rate": 4.237608777903897e-05, + "loss": 3.0308, + "theoretical_loss": 3.3616687636073777, + "tokens_seen": 2564816896 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.4768475890159607, + "objective/train/docs_used": 1443158, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.762218713760376, + "objective/train/original_loss": 2.762218475341797, + "objective/train/theoretical_loss": 3.3615733524557143, + "objective/train/tokens_used": 2586194400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2334812730550766, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488715171813965, + "objective/train/weighted_lm_loss": 2.8970489501953125, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9876766204833984, + "theoretical_loss": 3.3615733524557143, + "tokens_seen": 2565734400 + }, + { + "epoch": 0.92, + "learning_rate": 4.21869088157397e-05, + "loss": 2.9873, + "theoretical_loss": 3.3615597258562855, + "tokens_seen": 2565865472 + }, + { + "epoch": 0.92, + "learning_rate": 4.199772985244041e-05, + "loss": 2.9843, + "theoretical_loss": 3.3614507451267834, + "tokens_seen": 2566914048 + }, + { + "epoch": 0.92, + "learning_rate": 4.180855088914113e-05, + "loss": 2.954, + "theoretical_loss": 3.361341821365777, + "tokens_seen": 2567962624 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.4460161030292511, + "objective/train/docs_used": 1445048, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.046217441558838, + "objective/train/original_loss": 3.046217441558838, + "objective/train/theoretical_loss": 3.3612329545202426, + "objective/train/tokens_used": 2589471200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23430149257183075, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0457913875579834, + "objective/train/weighted_lm_loss": 3.190871238708496, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.9551336169242859, + "theoretical_loss": 3.3612329545202426, + "tokens_seen": 2569011200 + }, + { + "epoch": 0.92, + "learning_rate": 4.1619371925841845e-05, + "loss": 2.9734, + "theoretical_loss": 3.3612329545202426, + "tokens_seen": 2569011200 + }, + { + "epoch": 0.92, + "learning_rate": 4.1430192962542566e-05, + "loss": 2.9417, + "theoretical_loss": 3.361124144537228, + "tokens_seen": 2570059776 + }, + { + "epoch": 0.92, + "learning_rate": 4.1241013999243287e-05, + "loss": 2.9385, + "theoretical_loss": 3.361015391363852, + "tokens_seen": 2571108352 + }, + { + "epoch": 0.92, + "learning_rate": 4.1051835035944e-05, + "loss": 3.0063, + "theoretical_loss": 3.360906694947303, + "tokens_seen": 2572156928 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.48974141478538513, + "objective/train/docs_used": 1447122, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.636260986328125, + "objective/train/original_loss": 2.636261224746704, + "objective/train/theoretical_loss": 3.360893111883321, + "objective/train/tokens_used": 2592748000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2414124757051468, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050201177597046, + "objective/train/weighted_lm_loss": 2.769242763519287, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9594744443893433, + "theoretical_loss": 3.360893111883321, + "tokens_seen": 2572288000 + }, + { + "epoch": 0.92, + "learning_rate": 4.086265607264472e-05, + "loss": 2.9561, + "theoretical_loss": 3.360798055234841, + "tokens_seen": 2573205504 + }, + { + "epoch": 0.92, + "learning_rate": 4.067347710934544e-05, + "loss": 2.9818, + "theoretical_loss": 3.3606894721737968, + "tokens_seen": 2574254080 + }, + { + "epoch": 0.92, + "learning_rate": 4.0484298146046155e-05, + "loss": 2.9962, + "theoretical_loss": 3.36058094571157, + "tokens_seen": 2575302656 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.49044662714004517, + "objective/train/docs_used": 1449033, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7619566917419434, + "objective/train/original_loss": 2.7619566917419434, + "objective/train/theoretical_loss": 3.3605538229334218, + "objective/train/tokens_used": 2596024800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24158285558223724, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502727031707764, + "objective/train/weighted_lm_loss": 2.901848793029785, + "objective/train/weights_max": 1.0512210130691528, + "objective/train/weights_min": 0.9779994487762451, + "theoretical_loss": 3.3605538229334218, + "tokens_seen": 2575564800 + }, + { + "epoch": 0.92, + "learning_rate": 4.0295119182746876e-05, + "loss": 3.0442, + "theoretical_loss": 3.360472475795633, + "tokens_seen": 2576351232 + }, + { + "epoch": 0.92, + "learning_rate": 4.0105940219447596e-05, + "loss": 3.0068, + "theoretical_loss": 3.3603640623735247, + "tokens_seen": 2577399808 + }, + { + "epoch": 0.92, + "learning_rate": 3.991676125614832e-05, + "loss": 3.0617, + "theoretical_loss": 3.360255705392857, + "tokens_seen": 2578448384 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.4713362753391266, + "objective/train/docs_used": 1450816, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0295400619506836, + "objective/train/original_loss": 3.0295395851135254, + "objective/train/theoretical_loss": 3.360215086065735, + "objective/train/tokens_used": 2599301600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23452427983283997, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0483254194259644, + "objective/train/weighted_lm_loss": 3.175776958465576, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9520061016082764, + "theoretical_loss": 3.360215086065735, + "tokens_seen": 2578841600 + }, + { + "epoch": 0.92, + "learning_rate": 3.972758229284904e-05, + "loss": 2.9335, + "theoretical_loss": 3.3601474048013107, + "tokens_seen": 2579496960 + }, + { + "epoch": 0.92, + "learning_rate": 3.953840332954976e-05, + "loss": 2.9984, + "theoretical_loss": 3.3600391605466364, + "tokens_seen": 2580545536 + }, + { + "epoch": 0.92, + "learning_rate": 3.934922436625047e-05, + "loss": 2.9435, + "theoretical_loss": 3.359930972576654, + "tokens_seen": 2581594112 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.4855765104293823, + "objective/train/docs_used": 1453056, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.05781626701355, + "objective/train/original_loss": 3.0578160285949707, + "objective/train/theoretical_loss": 3.359876899682135, + "objective/train/tokens_used": 2602578400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24145200848579407, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497848987579346, + "objective/train/weighted_lm_loss": 3.208839178085327, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9515539407730103, + "theoretical_loss": 3.359876899682135, + "tokens_seen": 2582118400 + }, + { + "epoch": 0.92, + "learning_rate": 3.916004540295119e-05, + "loss": 2.9645, + "theoretical_loss": 3.359822840839253, + "tokens_seen": 2582642688 + }, + { + "epoch": 0.92, + "learning_rate": 3.897086643965191e-05, + "loss": 2.9902, + "theoretical_loss": 3.359714765282393, + "tokens_seen": 2583691264 + }, + { + "epoch": 0.92, + "learning_rate": 3.8781687476352627e-05, + "loss": 3.037, + "theoretical_loss": 3.3596067458541015, + "tokens_seen": 2584739840 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.4761214852333069, + "objective/train/docs_used": 1455160, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8679680824279785, + "objective/train/original_loss": 2.8679680824279785, + "objective/train/theoretical_loss": 3.3595392621911433, + "objective/train/tokens_used": 2605855200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24079108238220215, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0488356351852417, + "objective/train/weighted_lm_loss": 3.006171226501465, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9513256549835205, + "theoretical_loss": 3.3595392621911433, + "tokens_seen": 2585395200 + }, + { + "epoch": 0.92, + "learning_rate": 3.859250851305335e-05, + "loss": 3.0005, + "theoretical_loss": 3.3594987825024765, + "tokens_seen": 2585788416 + }, + { + "epoch": 0.92, + "learning_rate": 3.840332954975407e-05, + "loss": 3.0012, + "theoretical_loss": 3.359390875175684, + "tokens_seen": 2586836992 + }, + { + "epoch": 0.92, + "learning_rate": 3.821415058645478e-05, + "loss": 3.018, + "theoretical_loss": 3.3592830238219595, + "tokens_seen": 2587885568 + }, + { + "epoch": 0.92, + "objective/train/advantage_avg": 0.49262744188308716, + "objective/train/docs_used": 1457167, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8069980144500732, + "objective/train/original_loss": 2.8069982528686523, + "objective/train/theoretical_loss": 3.359202172007891, + "objective/train/tokens_used": 2609132000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24417226016521454, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0505039691925049, + "objective/train/weighted_lm_loss": 2.9489877223968506, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9552015066146851, + "theoretical_loss": 3.359202172007891, + "tokens_seen": 2588672000 + }, + { + "epoch": 0.92, + "learning_rate": 3.80249716231555e-05, + "loss": 3.0026, + "theoretical_loss": 3.359175228389607, + "tokens_seen": 2588934144 + }, + { + "epoch": 0.93, + "learning_rate": 3.783579265985623e-05, + "loss": 3.021, + "theoretical_loss": 3.359067488826999, + "tokens_seen": 2589982720 + }, + { + "epoch": 0.93, + "learning_rate": 3.764661369655694e-05, + "loss": 2.9914, + "theoretical_loss": 3.3589598050825775, + "tokens_seen": 2591031296 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.4884391129016876, + "objective/train/docs_used": 1459162, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.828636884689331, + "objective/train/original_loss": 2.82863712310791, + "objective/train/theoretical_loss": 3.3588656275540845, + "objective/train/tokens_used": 2612408800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24153073132038116, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500717163085938, + "objective/train/weighted_lm_loss": 2.9701390266418457, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9631984829902649, + "theoretical_loss": 3.3588656275540845, + "tokens_seen": 2591948800 + }, + { + "epoch": 0.93, + "learning_rate": 3.7457434733257664e-05, + "loss": 3.0185, + "theoretical_loss": 3.3588521771048514, + "tokens_seen": 2592079872 + }, + { + "epoch": 0.93, + "learning_rate": 3.7268255769958384e-05, + "loss": 3.0217, + "theoretical_loss": 3.3587446048423995, + "tokens_seen": 2593128448 + }, + { + "epoch": 0.93, + "learning_rate": 3.70790768066591e-05, + "loss": 2.9578, + "theoretical_loss": 3.358637088243867, + "tokens_seen": 2594177024 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.48765861988067627, + "objective/train/docs_used": 1460817, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.20979380607605, + "objective/train/original_loss": 3.20979380607605, + "objective/train/theoretical_loss": 3.3585296272579694, + "objective/train/tokens_used": 2615685600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2429957538843155, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500011444091797, + "objective/train/weighted_lm_loss": 3.370309352874756, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9553640484809875, + "theoretical_loss": 3.3585296272579694, + "tokens_seen": 2595225600 + }, + { + "epoch": 0.93, + "learning_rate": 3.688989784335982e-05, + "loss": 3.0662, + "theoretical_loss": 3.3585296272579694, + "tokens_seen": 2595225600 + }, + { + "epoch": 0.93, + "learning_rate": 3.670071888006054e-05, + "loss": 3.0268, + "theoretical_loss": 3.358422221833488, + "tokens_seen": 2596274176 + }, + { + "epoch": 0.93, + "learning_rate": 3.651153991676125e-05, + "loss": 3.0209, + "theoretical_loss": 3.358314871919273, + "tokens_seen": 2597322752 + }, + { + "epoch": 0.93, + "learning_rate": 3.632236095346197e-05, + "loss": 3.0275, + "theoretical_loss": 3.3582075774642424, + "tokens_seen": 2598371328 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.4868325889110565, + "objective/train/docs_used": 1462730, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.88214373588562, + "objective/train/original_loss": 2.88214373588562, + "objective/train/theoretical_loss": 3.358194169554296, + "objective/train/tokens_used": 2618962400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24065445363521576, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049906611442566, + "objective/train/weighted_lm_loss": 3.026564598083496, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9531733989715576, + "theoretical_loss": 3.358194169554296, + "tokens_seen": 2598502400 + }, + { + "epoch": 0.93, + "learning_rate": 3.6133181990162694e-05, + "loss": 3.0284, + "theoretical_loss": 3.358100338417381, + "tokens_seen": 2599419904 + }, + { + "epoch": 0.93, + "learning_rate": 3.594400302686341e-05, + "loss": 3.1054, + "theoretical_loss": 3.3579931547277426, + "tokens_seen": 2600468480 + }, + { + "epoch": 0.93, + "learning_rate": 3.575482406356413e-05, + "loss": 3.092, + "theoretical_loss": 3.3578860263444463, + "tokens_seen": 2601517056 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.4892682433128357, + "objective/train/docs_used": 1464443, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9992620944976807, + "objective/train/original_loss": 2.9992618560791016, + "objective/train/theoretical_loss": 3.3578592528842823, + "objective/train/tokens_used": 2622239200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24054360389709473, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0501495599746704, + "objective/train/weighted_lm_loss": 3.1498708724975586, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9776185154914856, + "theoretical_loss": 3.3578592528842823, + "tokens_seen": 2601779200 + }, + { + "epoch": 0.93, + "learning_rate": 3.5565645100264856e-05, + "loss": 3.0732, + "theoretical_loss": 3.3577789532166804, + "tokens_seen": 2602565632 + }, + { + "epoch": 0.93, + "learning_rate": 3.537646613696557e-05, + "loss": 3.0749, + "theoretical_loss": 3.3576719352936992, + "tokens_seen": 2603614208 + }, + { + "epoch": 0.93, + "learning_rate": 3.518728717366629e-05, + "loss": 3.0803, + "theoretical_loss": 3.357564972524824, + "tokens_seen": 2604662784 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.49427875876426697, + "objective/train/docs_used": 1466332, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.944730043411255, + "objective/train/original_loss": 2.944730043411255, + "objective/train/theoretical_loss": 3.357524875695582, + "objective/train/tokens_used": 2625516000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24559368193149567, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506764650344849, + "objective/train/weighted_lm_loss": 3.093897819519043, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9565245509147644, + "theoretical_loss": 3.357524875695582, + "tokens_seen": 2605056000 + }, + { + "epoch": 0.93, + "learning_rate": 3.499810821036701e-05, + "loss": 3.0559, + "theoretical_loss": 3.357458064859444, + "tokens_seen": 2605711360 + }, + { + "epoch": 0.93, + "learning_rate": 3.4808929247067724e-05, + "loss": 3.0196, + "theoretical_loss": 3.3573512122470137, + "tokens_seen": 2606759936 + }, + { + "epoch": 0.93, + "learning_rate": 3.4619750283768445e-05, + "loss": 3.1101, + "theoretical_loss": 3.3572444146370555, + "tokens_seen": 2607808512 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.48803937435150146, + "objective/train/docs_used": 1468043, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.059133529663086, + "objective/train/original_loss": 3.0591330528259277, + "objective/train/theoretical_loss": 3.357191036442247, + "objective/train/tokens_used": 2628792800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2422197312116623, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500352382659912, + "objective/train/weighted_lm_loss": 3.2130544185638428, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9526868462562561, + "theoretical_loss": 3.357191036442247, + "tokens_seen": 2608332800 + }, + { + "epoch": 0.93, + "learning_rate": 3.4430571320469165e-05, + "loss": 3.0649, + "theoretical_loss": 3.3571376719791575, + "tokens_seen": 2608857088 + }, + { + "epoch": 0.93, + "learning_rate": 3.424139235716988e-05, + "loss": 3.0104, + "theoretical_loss": 3.357030984222975, + "tokens_seen": 2609905664 + }, + { + "epoch": 0.93, + "learning_rate": 3.40522133938706e-05, + "loss": 3.0704, + "theoretical_loss": 3.3569243513182294, + "tokens_seen": 2610954240 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.49350228905677795, + "objective/train/docs_used": 1469063, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5382068157196045, + "objective/train/original_loss": 2.5382070541381836, + "objective/train/theoretical_loss": 3.356857733584695, + "objective/train/tokens_used": 2632069600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24541838467121124, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050597906112671, + "objective/train/weighted_lm_loss": 2.6664652824401855, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.952812910079956, + "theoretical_loss": 3.356857733584695, + "tokens_seen": 2611609600 + }, + { + "epoch": 0.93, + "learning_rate": 3.386303443057132e-05, + "loss": 3.0581, + "theoretical_loss": 3.356817773214708, + "tokens_seen": 2612002816 + }, + { + "epoch": 0.93, + "learning_rate": 3.3673855467272034e-05, + "loss": 3.0308, + "theoretical_loss": 3.3567112498622644, + "tokens_seen": 2613051392 + }, + { + "epoch": 0.93, + "learning_rate": 3.348467650397276e-05, + "loss": 3.058, + "theoretical_loss": 3.3566047812108186, + "tokens_seen": 2614099968 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.49121448397636414, + "objective/train/docs_used": 1470949, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.566678285598755, + "objective/train/original_loss": 2.566678047180176, + "objective/train/theoretical_loss": 3.356524965589674, + "objective/train/tokens_used": 2635346400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24299617111682892, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503567457199097, + "objective/train/weighted_lm_loss": 2.6957244873046875, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9728997945785522, + "theoretical_loss": 3.356524965589674, + "tokens_seen": 2614886400 + }, + { + "epoch": 0.93, + "learning_rate": 3.329549754067348e-05, + "loss": 3.0031, + "theoretical_loss": 3.3564983672103548, + "tokens_seen": 2615148544 + }, + { + "epoch": 0.93, + "learning_rate": 3.3106318577374196e-05, + "loss": 3.0581, + "theoretical_loss": 3.3563920078109257, + "tokens_seen": 2616197120 + }, + { + "epoch": 0.93, + "learning_rate": 3.2917139614074916e-05, + "loss": 3.0728, + "theoretical_loss": 3.3562857029626474, + "tokens_seen": 2617245696 + }, + { + "epoch": 0.93, + "objective/train/advantage_avg": 0.4815143346786499, + "objective/train/docs_used": 1472903, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8542768955230713, + "objective/train/original_loss": 2.854276657104492, + "objective/train/theoretical_loss": 3.35619273093023, + "objective/train/tokens_used": 2638623200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23790426552295685, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493605136871338, + "objective/train/weighted_lm_loss": 2.994652509689331, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.952725350856781, + "theoretical_loss": 3.35619273093023, + "tokens_seen": 2618163200 + }, + { + "epoch": 0.94, + "learning_rate": 3.272796065077564e-05, + "loss": 3.1399, + "theoretical_loss": 3.3561794526157023, + "tokens_seen": 2618294272 + }, + { + "epoch": 0.94, + "learning_rate": 3.253878168747635e-05, + "loss": 3.0911, + "theoretical_loss": 3.356073256720338, + "tokens_seen": 2619342848 + }, + { + "epoch": 0.94, + "learning_rate": 3.234960272417707e-05, + "loss": 3.15, + "theoretical_loss": 3.3559671152268686, + "tokens_seen": 2620391424 + }, + { + "debugging/Self-BLEU-5": 0.449855913696806, + "debugging/distinct-1-grams": 0.769778305351557, + "debugging/distinct-2-grams": 0.9467712904639874, + "debugging/entropy-1-grams": 5.998633443012235, + "debugging/entropy-2-grams": 6.951908950814323, + "debugging/length": 469.11764705882354, + "debugging/num_segments": 17, + "debugging/raw_token_scores_avg": 0.039095163345336914, + "debugging/raw_token_scores_std": 0.11093362420797348, + "epoch": 0.94, + "objective/train/advantage_avg": 0.46091702580451965, + "objective/train/docs_used": 1474904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9136695861816406, + "objective/train/original_loss": 2.9136695861816406, + "objective/train/theoretical_loss": 3.3558610280856715, + "objective/train/tokens_used": 2641900000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22473308444023132, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0472337007522583, + "objective/train/weighted_lm_loss": 3.0515451431274414, + "objective/train/weights_max": 1.0512163639068604, + "objective/train/weights_min": 0.9528725147247314, + "theoretical_loss": 3.3558610280856715, + "tokens_seen": 2621440000 + }, + { + "epoch": 0.94, + "learning_rate": 3.216042376087779e-05, + "loss": 3.0863, + "theoretical_loss": 3.3558610280856715, + "tokens_seen": 2621440000 + }, + { + "epoch": 0.94, + "learning_rate": 3.1971244797578505e-05, + "loss": 3.0977, + "theoretical_loss": 3.3557549952471906, + "tokens_seen": 2622488576 + }, + { + "epoch": 0.94, + "learning_rate": 3.1782065834279226e-05, + "loss": 3.0837, + "theoretical_loss": 3.3556490166619337, + "tokens_seen": 2623537152 + }, + { + "epoch": 0.94, + "learning_rate": 3.1592886870979946e-05, + "loss": 3.1327, + "theoretical_loss": 3.3555430922804743, + "tokens_seen": 2624585728 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.48707959055900574, + "objective/train/docs_used": 1477123, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9778811931610107, + "objective/train/original_loss": 2.97788143157959, + "objective/train/theoretical_loss": 3.3555298555415374, + "objective/train/tokens_used": 2645176800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23922058939933777, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499240159988403, + "objective/train/weighted_lm_loss": 3.126800298690796, + "objective/train/weights_max": 1.0512198209762573, + "objective/train/weights_min": 1.0077565908432007, + "theoretical_loss": 3.3555298555415374, + "tokens_seen": 2624716800 + }, + { + "epoch": 0.94, + "learning_rate": 3.140370790768066e-05, + "loss": 3.0617, + "theoretical_loss": 3.3554372220534505, + "tokens_seen": 2625634304 + }, + { + "epoch": 0.94, + "learning_rate": 3.121452894438139e-05, + "loss": 3.0449, + "theoretical_loss": 3.3553314059315653, + "tokens_seen": 2626682880 + }, + { + "epoch": 0.94, + "learning_rate": 3.102534998108211e-05, + "loss": 3.0209, + "theoretical_loss": 3.3552256438655856, + "tokens_seen": 2627731456 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.4856823980808258, + "objective/train/docs_used": 1478969, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.793982982635498, + "objective/train/original_loss": 2.793982982635498, + "objective/train/theoretical_loss": 3.3551992117895626, + "objective/train/tokens_used": 2648453600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2396468073129654, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497863292694092, + "objective/train/weighted_lm_loss": 2.9343104362487793, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9753270745277405, + "theoretical_loss": 3.3551992117895626, + "tokens_seen": 2627993600 + }, + { + "epoch": 0.94, + "learning_rate": 3.083617101778282e-05, + "loss": 3.081, + "theoretical_loss": 3.355119935806343, + "tokens_seen": 2628780032 + }, + { + "epoch": 0.94, + "learning_rate": 3.064699205448354e-05, + "loss": 3.0635, + "theoretical_loss": 3.3550142817047335, + "tokens_seen": 2629828608 + }, + { + "epoch": 0.94, + "learning_rate": 3.0457813091184263e-05, + "loss": 3.03, + "theoretical_loss": 3.3549086815117164, + "tokens_seen": 2630877184 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.48200660943984985, + "objective/train/docs_used": 1481036, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2332446575164795, + "objective/train/original_loss": 3.2332448959350586, + "objective/train/theoretical_loss": 3.3548690953276465, + "objective/train/tokens_used": 2651730400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2395787388086319, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049418330192566, + "objective/train/weighted_lm_loss": 3.3921494483947754, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9518641233444214, + "theoretical_loss": 3.3548690953276465, + "tokens_seen": 2631270400 + }, + { + "epoch": 0.94, + "learning_rate": 3.026863412788498e-05, + "loss": 3.0466, + "theoretical_loss": 3.3548031351783174, + "tokens_seen": 2631925760 + }, + { + "epoch": 0.94, + "learning_rate": 3.0079455164585697e-05, + "loss": 3.0208, + "theoretical_loss": 3.3546976426556236, + "tokens_seen": 2632974336 + }, + { + "epoch": 0.94, + "learning_rate": 2.9890276201286418e-05, + "loss": 2.9977, + "theoretical_loss": 3.3545922038947875, + "tokens_seen": 2634022912 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.4820900857448578, + "objective/train/docs_used": 1482195, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2531821727752686, + "objective/train/original_loss": 3.2531819343566895, + "objective/train/theoretical_loss": 3.3545395046598183, + "objective/train/tokens_used": 2655007200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23769210278987885, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494171380996704, + "objective/train/weighted_lm_loss": 3.413717031478882, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9524181485176086, + "theoretical_loss": 3.3545395046598183, + "tokens_seen": 2634547200 + }, + { + "epoch": 0.94, + "learning_rate": 2.9701097237987138e-05, + "loss": 3.0562, + "theoretical_loss": 3.3544868188470245, + "tokens_seen": 2635071488 + }, + { + "epoch": 0.94, + "learning_rate": 2.9511918274687855e-05, + "loss": 3.0351, + "theoretical_loss": 3.354381487463615, + "tokens_seen": 2636120064 + }, + { + "epoch": 0.94, + "learning_rate": 2.9322739311388576e-05, + "loss": 3.0414, + "theoretical_loss": 3.3542762096959007, + "tokens_seen": 2637168640 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.48411837220191956, + "objective/train/docs_used": 1484156, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8459889888763428, + "objective/train/original_loss": 2.8459887504577637, + "objective/train/theoretical_loss": 3.3542104382962057, + "objective/train/tokens_used": 2658284000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23963847756385803, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049630045890808, + "objective/train/weighted_lm_loss": 2.9878950119018555, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9651797413825989, + "theoretical_loss": 3.3542104382962057, + "tokens_seen": 2637824000 + }, + { + "epoch": 0.94, + "learning_rate": 2.9133560348089293e-05, + "loss": 3.0507, + "theoretical_loss": 3.3541709854952892, + "tokens_seen": 2638217216 + }, + { + "epoch": 0.94, + "learning_rate": 2.894438138479001e-05, + "loss": 3.0462, + "theoretical_loss": 3.35406581481325, + "tokens_seen": 2639265792 + }, + { + "epoch": 0.94, + "learning_rate": 2.875520242149073e-05, + "loss": 3.0154, + "theoretical_loss": 3.353960697601316, + "tokens_seen": 2640314368 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.48868516087532043, + "objective/train/docs_used": 1485777, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8094425201416016, + "objective/train/original_loss": 2.8094422817230225, + "objective/train/theoretical_loss": 3.353881894753002, + "objective/train/tokens_used": 2661560800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24112290143966675, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500941276550293, + "objective/train/weighted_lm_loss": 2.9495749473571777, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9579086303710938, + "theoretical_loss": 3.353881894753002, + "tokens_seen": 2641100800 + }, + { + "epoch": 0.94, + "learning_rate": 2.856602345819145e-05, + "loss": 3.0786, + "theoretical_loss": 3.353855633811084, + "tokens_seen": 2641362944 + }, + { + "epoch": 0.94, + "learning_rate": 2.837684449489217e-05, + "loss": 3.0674, + "theoretical_loss": 3.3537506233942116, + "tokens_seen": 2642411520 + }, + { + "epoch": 0.94, + "learning_rate": 2.818766553159289e-05, + "loss": 3.0164, + "theoretical_loss": 3.353645666302423, + "tokens_seen": 2643460096 + }, + { + "epoch": 0.94, + "objective/train/advantage_avg": 0.48162642121315, + "objective/train/docs_used": 1488108, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0382301807403564, + "objective/train/original_loss": 3.0382299423217773, + "objective/train/theoretical_loss": 3.353553872552434, + "objective/train/tokens_used": 2664837600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23907937109470367, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0493777990341187, + "objective/train/weighted_lm_loss": 3.186624050140381, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9515630006790161, + "theoretical_loss": 3.353553872552434, + "tokens_seen": 2644377600 + }, + { + "epoch": 0.94, + "learning_rate": 2.7998486568293606e-05, + "loss": 3.0749, + "theoretical_loss": 3.3535407624875013, + "tokens_seen": 2644508672 + }, + { + "epoch": 0.94, + "learning_rate": 2.7809307604994323e-05, + "loss": 3.1002, + "theoretical_loss": 3.3534359119012946, + "tokens_seen": 2645557248 + }, + { + "epoch": 0.95, + "learning_rate": 2.7620128641695044e-05, + "loss": 3.073, + "theoretical_loss": 3.3533311144957136, + "tokens_seen": 2646605824 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.4883487820625305, + "objective/train/docs_used": 1490143, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5410468578338623, + "objective/train/original_loss": 2.541046619415283, + "objective/train/theoretical_loss": 3.3532263702227305, + "objective/train/tokens_used": 2668114400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24362722039222717, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500731468200684, + "objective/train/weighted_lm_loss": 2.6677608489990234, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9517417550086975, + "theoretical_loss": 3.3532263702227305, + "tokens_seen": 2647654400 + }, + { + "epoch": 0.95, + "learning_rate": 2.7430949678395765e-05, + "loss": 3.0142, + "theoretical_loss": 3.3532263702227305, + "tokens_seen": 2647654400 + }, + { + "epoch": 0.95, + "learning_rate": 2.724177071509648e-05, + "loss": 3.0453, + "theoretical_loss": 3.3531216790343805, + "tokens_seen": 2648702976 + }, + { + "epoch": 0.95, + "learning_rate": 2.7052591751797202e-05, + "loss": 3.0265, + "theoretical_loss": 3.35301704088276, + "tokens_seen": 2649751552 + }, + { + "epoch": 0.95, + "learning_rate": 2.686341278849792e-05, + "loss": 3.0057, + "theoretical_loss": 3.3529124557200296, + "tokens_seen": 2650800128 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.47749945521354675, + "objective/train/docs_used": 1492100, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0875096321105957, + "objective/train/original_loss": 3.0875096321105957, + "objective/train/theoretical_loss": 3.35289938629809, + "objective/train/tokens_used": 2671391200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24302785098552704, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489848852157593, + "objective/train/weighted_lm_loss": 3.23630428314209, + "objective/train/weights_max": 1.0512157678604126, + "objective/train/weights_min": 0.9513412117958069, + "theoretical_loss": 3.35289938629809, + "tokens_seen": 2650931200 + }, + { + "epoch": 0.95, + "learning_rate": 2.6674233825198637e-05, + "loss": 3.0612, + "theoretical_loss": 3.3528079234984105, + "tokens_seen": 2651848704 + }, + { + "epoch": 0.95, + "learning_rate": 2.648505486189936e-05, + "loss": 3.0719, + "theoretical_loss": 3.352703444170186, + "tokens_seen": 2652897280 + }, + { + "epoch": 0.95, + "learning_rate": 2.6295875898600078e-05, + "loss": 2.9922, + "theoretical_loss": 3.3525990176877007, + "tokens_seen": 2653945856 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.4898962080478668, + "objective/train/docs_used": 1494002, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.18452787399292, + "objective/train/original_loss": 3.184528350830078, + "objective/train/theoretical_loss": 3.3525729193186478, + "objective/train/tokens_used": 2674668000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24191275238990784, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502192974090576, + "objective/train/weighted_lm_loss": 3.344353675842285, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.9598506689071655, + "theoretical_loss": 3.3525729193186478, + "tokens_seen": 2654208000 + }, + { + "epoch": 0.95, + "learning_rate": 2.6106696935300795e-05, + "loss": 3.0289, + "theoretical_loss": 3.3524946440033627, + "tokens_seen": 2654994432 + }, + { + "epoch": 0.95, + "learning_rate": 2.5917517972001515e-05, + "loss": 3.0558, + "theoretical_loss": 3.35239032306964, + "tokens_seen": 2656043008 + }, + { + "epoch": 0.95, + "learning_rate": 2.5728339008702233e-05, + "loss": 3.0116, + "theoretical_loss": 3.352286054839063, + "tokens_seen": 2657091584 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.4882798194885254, + "objective/train/docs_used": 1495727, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.735853433609009, + "objective/train/original_loss": 2.7358531951904297, + "objective/train/theoretical_loss": 3.3522469678304483, + "objective/train/tokens_used": 2677944800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24260863661766052, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0500612258911133, + "objective/train/weighted_lm_loss": 2.8720898628234863, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9521510004997253, + "theoretical_loss": 3.3522469678304483, + "tokens_seen": 2657484800 + }, + { + "epoch": 0.95, + "learning_rate": 2.553916004540295e-05, + "loss": 3.0711, + "theoretical_loss": 3.3521818392642233, + "tokens_seen": 2658140160 + }, + { + "epoch": 0.95, + "learning_rate": 2.5349981082103674e-05, + "loss": 2.9868, + "theoretical_loss": 3.3520776762977738, + "tokens_seen": 2659188736 + }, + { + "epoch": 0.95, + "learning_rate": 2.516080211880439e-05, + "loss": 3.0218, + "theoretical_loss": 3.3519735658924286, + "tokens_seen": 2660237312 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.4827539622783661, + "objective/train/docs_used": 1497686, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5873148441314697, + "objective/train/original_loss": 2.5873146057128906, + "objective/train/theoretical_loss": 3.35192153038541, + "objective/train/tokens_used": 2681221600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23605278134346008, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049475073814392, + "objective/train/weighted_lm_loss": 2.7146008014678955, + "objective/train/weights_max": 1.051217794418335, + "objective/train/weights_min": 0.9633801579475403, + "theoretical_loss": 3.35192153038541, + "tokens_seen": 2660761600 + }, + { + "epoch": 0.95, + "learning_rate": 2.4971623155505108e-05, + "loss": 3.0077, + "theoretical_loss": 3.3518695080009633, + "tokens_seen": 2661285888 + }, + { + "epoch": 0.95, + "learning_rate": 2.478244419220583e-05, + "loss": 3.0271, + "theoretical_loss": 3.351765502576214, + "tokens_seen": 2662334464 + }, + { + "epoch": 0.95, + "learning_rate": 2.4593265228906546e-05, + "loss": 3.0663, + "theoretical_loss": 3.3516615495710775, + "tokens_seen": 2663383040 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.493733286857605, + "objective/train/docs_used": 1499388, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9322574138641357, + "objective/train/original_loss": 2.932257652282715, + "objective/train/theoretical_loss": 3.351596605541298, + "objective/train/tokens_used": 2684498400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2454751431941986, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506212711334229, + "objective/train/weighted_lm_loss": 3.0805580615997314, + "objective/train/weights_max": 1.051218867301941, + "objective/train/weights_min": 0.9514515995979309, + "theoretical_loss": 3.351596605541298, + "tokens_seen": 2664038400 + }, + { + "epoch": 0.95, + "learning_rate": 2.4404086265607263e-05, + "loss": 3.0406, + "theoretical_loss": 3.351557648938513, + "tokens_seen": 2664431616 + }, + { + "epoch": 0.95, + "learning_rate": 2.4214907302307987e-05, + "loss": 3.0289, + "theoretical_loss": 3.351453800631538, + "tokens_seen": 2665480192 + }, + { + "epoch": 0.95, + "learning_rate": 2.4025728339008704e-05, + "loss": 3.0233, + "theoretical_loss": 3.3513500046032325, + "tokens_seen": 2666528768 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.4782050549983978, + "objective/train/docs_used": 1500922, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9047739505767822, + "objective/train/original_loss": 2.904773712158203, + "objective/train/theoretical_loss": 3.351272191861688, + "objective/train/tokens_used": 2687775200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2350698709487915, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490152835845947, + "objective/train/weighted_lm_loss": 3.0477681159973145, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9560204148292542, + "theoretical_loss": 3.351272191861688, + "tokens_seen": 2667315200 + }, + { + "epoch": 0.95, + "learning_rate": 2.383654937570942e-05, + "loss": 3.0389, + "theoretical_loss": 3.351246260806736, + "tokens_seen": 2667577344 + }, + { + "epoch": 0.95, + "learning_rate": 2.364737041241014e-05, + "loss": 3.0276, + "theoretical_loss": 3.3511425691952486, + "tokens_seen": 2668625920 + }, + { + "epoch": 0.95, + "learning_rate": 2.345819144911086e-05, + "loss": 3.0211, + "theoretical_loss": 3.3510389297220318, + "tokens_seen": 2669674496 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.47957465052604675, + "objective/train/docs_used": 1502841, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7510986328125, + "objective/train/original_loss": 2.7510986328125, + "objective/train/theoretical_loss": 3.350948287915944, + "objective/train/tokens_used": 2691052000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23619519174098969, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049157977104187, + "objective/train/weighted_lm_loss": 2.886131525039673, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9544672966003418, + "theoretical_loss": 3.350948287915944, + "tokens_seen": 2670592000 + }, + { + "epoch": 0.95, + "learning_rate": 2.3269012485811576e-05, + "loss": 3.0576, + "theoretical_loss": 3.350935342340405, + "tokens_seen": 2670723072 + }, + { + "epoch": 0.95, + "learning_rate": 2.30798335225123e-05, + "loss": 3.0318, + "theoretical_loss": 3.3508318070037504, + "tokens_seen": 2671771648 + }, + { + "epoch": 0.95, + "learning_rate": 2.2890654559213017e-05, + "loss": 3.0846, + "theoretical_loss": 3.350728323665508, + "tokens_seen": 2672820224 + }, + { + "epoch": 0.95, + "objective/train/advantage_avg": 0.48705071210861206, + "objective/train/docs_used": 1504654, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8059308528900146, + "objective/train/original_loss": 2.8059306144714355, + "objective/train/theoretical_loss": 3.3506248922791784, + "objective/train/tokens_used": 2694328800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24300509691238403, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499402284622192, + "objective/train/weighted_lm_loss": 2.945868730545044, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9610604047775269, + "theoretical_loss": 3.3506248922791784, + "tokens_seen": 2673868800 + }, + { + "epoch": 0.96, + "learning_rate": 2.2701475595913734e-05, + "loss": 3.0519, + "theoretical_loss": 3.3506248922791784, + "tokens_seen": 2673868800 + }, + { + "epoch": 0.96, + "learning_rate": 2.2512296632614455e-05, + "loss": 3.0418, + "theoretical_loss": 3.3505215127983226, + "tokens_seen": 2674917376 + }, + { + "epoch": 0.96, + "learning_rate": 2.2323117669315172e-05, + "loss": 3.1014, + "theoretical_loss": 3.3504181851765606, + "tokens_seen": 2675965952 + }, + { + "epoch": 0.96, + "learning_rate": 2.2133938706015892e-05, + "loss": 3.041, + "theoretical_loss": 3.3503149093675724, + "tokens_seen": 2677014528 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.4849916100502014, + "objective/train/docs_used": 1506560, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9666738510131836, + "objective/train/original_loss": 2.9666740894317627, + "objective/train/theoretical_loss": 3.35030200353223, + "objective/train/tokens_used": 2697605600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24110575020313263, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0497246980667114, + "objective/train/weighted_lm_loss": 3.1141273975372314, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9574628472328186, + "theoretical_loss": 3.35030200353223, + "tokens_seen": 2677145600 + }, + { + "epoch": 0.96, + "learning_rate": 2.1944759742716613e-05, + "loss": 3.0359, + "theoretical_loss": 3.3502116853250974, + "tokens_seen": 2678063104 + }, + { + "epoch": 0.96, + "learning_rate": 2.175558077941733e-05, + "loss": 3.0588, + "theoretical_loss": 3.350108513002934, + "tokens_seen": 2679111680 + }, + { + "epoch": 0.96, + "learning_rate": 2.1566401816118047e-05, + "loss": 3.0685, + "theoretical_loss": 3.35000539235494, + "tokens_seen": 2680160256 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.4583278298377991, + "objective/train/docs_used": 1507767, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6217093467712402, + "objective/train/original_loss": 2.6217093467712402, + "objective/train/theoretical_loss": 3.349979620261629, + "objective/train/tokens_used": 2700882400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22251355648040771, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0469634532928467, + "objective/train/weighted_lm_loss": 2.7458016872406006, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9549773335456848, + "theoretical_loss": 3.349979620261629, + "tokens_seen": 2680422400 + }, + { + "epoch": 0.96, + "learning_rate": 2.1377222852818768e-05, + "loss": 3.0494, + "theoretical_loss": 3.3499023233350336, + "tokens_seen": 2681208832 + }, + { + "epoch": 0.96, + "learning_rate": 2.1188043889519485e-05, + "loss": 3.0639, + "theoretical_loss": 3.3497993058971898, + "tokens_seen": 2682257408 + }, + { + "epoch": 0.96, + "learning_rate": 2.0998864926220206e-05, + "loss": 3.0573, + "theoretical_loss": 3.349696339995445, + "tokens_seen": 2683305984 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.485384076833725, + "objective/train/docs_used": 1509839, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9253435134887695, + "objective/train/original_loss": 2.9253437519073486, + "objective/train/theoretical_loss": 3.3496577410595694, + "objective/train/tokens_used": 2704159200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23941144347190857, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049755334854126, + "objective/train/weighted_lm_loss": 3.0702219009399414, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9517756104469299, + "theoretical_loss": 3.3496577410595694, + "tokens_seen": 2683699200 + }, + { + "epoch": 0.96, + "learning_rate": 2.0809685962920923e-05, + "loss": 3.0022, + "theoretical_loss": 3.3495934255838926, + "tokens_seen": 2684354560 + }, + { + "epoch": 0.96, + "learning_rate": 2.0620506999621643e-05, + "loss": 3.0025, + "theoretical_loss": 3.349490562616686, + "tokens_seen": 2685403136 + }, + { + "epoch": 0.96, + "learning_rate": 2.043132803632236e-05, + "loss": 3.0222, + "theoretical_loss": 3.349387751048037, + "tokens_seen": 2686451712 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.49125921726226807, + "objective/train/docs_used": 1511903, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1318821907043457, + "objective/train/original_loss": 3.1318821907043457, + "objective/train/theoretical_loss": 3.3493363645238787, + "objective/train/tokens_used": 2707436000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24440424144268036, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503684282302856, + "objective/train/weighted_lm_loss": 3.2890326976776123, + "objective/train/weights_max": 1.0512181520462036, + "objective/train/weights_min": 0.9706076979637146, + "theoretical_loss": 3.3493363645238787, + "tokens_seen": 2686976000 + }, + { + "epoch": 0.96, + "learning_rate": 2.0242149073023078e-05, + "loss": 3.0417, + "theoretical_loss": 3.3492849908322158, + "tokens_seen": 2687500288 + }, + { + "epoch": 0.96, + "learning_rate": 2.0052970109723798e-05, + "loss": 3.052, + "theoretical_loss": 3.349182281923551, + "tokens_seen": 2688548864 + }, + { + "epoch": 0.96, + "learning_rate": 1.986379114642452e-05, + "loss": 3.0494, + "theoretical_loss": 3.3490796242764302, + "tokens_seen": 2689597440 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.4908863604068756, + "objective/train/docs_used": 1514263, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.605135917663574, + "objective/train/original_loss": 2.605135440826416, + "objective/train/theoretical_loss": 3.3490154892579884, + "objective/train/tokens_used": 2710712800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2420625537633896, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503190755844116, + "objective/train/weighted_lm_loss": 2.7363693714141846, + "objective/train/weights_max": 1.0512186288833618, + "objective/train/weights_min": 0.9990963935852051, + "theoretical_loss": 3.3490154892579884, + "tokens_seen": 2690252800 + }, + { + "epoch": 0.96, + "learning_rate": 1.9674612183125236e-05, + "loss": 2.9764, + "theoretical_loss": 3.348977017845299, + "tokens_seen": 2690646016 + }, + { + "epoch": 0.96, + "learning_rate": 1.9485433219825956e-05, + "loss": 2.9394, + "theoretical_loss": 3.3488744625846607, + "tokens_seen": 2691694592 + }, + { + "epoch": 0.96, + "learning_rate": 1.9296254256526674e-05, + "loss": 3.0521, + "theoretical_loss": 3.3487719584490776, + "tokens_seen": 2692743168 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.487415611743927, + "objective/train/docs_used": 1516259, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.661348581314087, + "objective/train/original_loss": 2.661348819732666, + "objective/train/theoretical_loss": 3.3486951138709067, + "objective/train/tokens_used": 2713989600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24382545053958893, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0499809980392456, + "objective/train/weighted_lm_loss": 2.79396915435791, + "objective/train/weights_max": 1.0512179136276245, + "objective/train/weights_min": 0.956028163433075, + "theoretical_loss": 3.3486951138709067, + "tokens_seen": 2693529600 + }, + { + "epoch": 0.96, + "learning_rate": 1.910707529322739e-05, + "loss": 2.9722, + "theoretical_loss": 3.348669505393169, + "tokens_seen": 2693791744 + }, + { + "epoch": 0.96, + "learning_rate": 1.8917896329928115e-05, + "loss": 3.0601, + "theoretical_loss": 3.348567103371614, + "tokens_seen": 2694840320 + }, + { + "epoch": 0.96, + "learning_rate": 1.8728717366628832e-05, + "loss": 3.0398, + "theoretical_loss": 3.3484647523391473, + "tokens_seen": 2695888896 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.4843372106552124, + "objective/train/docs_used": 1518290, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.877500534057617, + "objective/train/original_loss": 2.877500534057617, + "objective/train/theoretical_loss": 3.3483752369771853, + "objective/train/tokens_used": 2717266400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24128593504428864, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049660086631775, + "objective/train/weighted_lm_loss": 3.0197174549102783, + "objective/train/weights_max": 1.0512170791625977, + "objective/train/weights_min": 0.9525272250175476, + "theoretical_loss": 3.3483752369771853, + "tokens_seen": 2696806400 + }, + { + "epoch": 0.96, + "learning_rate": 1.853953840332955e-05, + "loss": 3.0041, + "theoretical_loss": 3.3483624522505617, + "tokens_seen": 2696937472 + }, + { + "epoch": 0.96, + "learning_rate": 1.835035944003027e-05, + "loss": 3.0267, + "theoretical_loss": 3.34826020306071, + "tokens_seen": 2697986048 + }, + { + "epoch": 0.96, + "learning_rate": 1.8161180476730987e-05, + "loss": 2.9848, + "theoretical_loss": 3.3481580047244988, + "tokens_seen": 2699034624 + }, + { + "epoch": 0.96, + "objective/train/advantage_avg": 0.4914160370826721, + "objective/train/docs_used": 1520209, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1949706077575684, + "objective/train/original_loss": 3.1949706077575684, + "objective/train/theoretical_loss": 3.3480558571968952, + "objective/train/tokens_used": 2720543200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24515222012996674, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503878593444824, + "objective/train/weighted_lm_loss": 3.3562815189361572, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9568387866020203, + "theoretical_loss": 3.3480558571968952, + "tokens_seen": 2700083200 + }, + { + "epoch": 0.96, + "learning_rate": 1.7972001513431704e-05, + "loss": 3.0669, + "theoretical_loss": 3.3480558571968952, + "tokens_seen": 2700083200 + }, + { + "epoch": 0.96, + "learning_rate": 1.7782822550132428e-05, + "loss": 3.0338, + "theoretical_loss": 3.347953760432923, + "tokens_seen": 2701131776 + }, + { + "epoch": 0.97, + "learning_rate": 1.7593643586833145e-05, + "loss": 2.9885, + "theoretical_loss": 3.3478517143876614, + "tokens_seen": 2702180352 + }, + { + "epoch": 0.97, + "learning_rate": 1.7404464623533862e-05, + "loss": 2.9584, + "theoretical_loss": 3.3477497190162495, + "tokens_seen": 2703228928 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.4775691032409668, + "objective/train/docs_used": 1521508, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8530027866363525, + "objective/train/original_loss": 2.8530025482177734, + "objective/train/theoretical_loss": 3.347736973155596, + "objective/train/tokens_used": 2723820000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23687875270843506, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489609241485596, + "objective/train/weighted_lm_loss": 2.991417407989502, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9562798738479614, + "theoretical_loss": 3.347736973155596, + "tokens_seen": 2703360000 + }, + { + "epoch": 0.97, + "learning_rate": 1.7215285660234583e-05, + "loss": 2.9654, + "theoretical_loss": 3.3476477742738817, + "tokens_seen": 2704277504 + }, + { + "epoch": 0.97, + "learning_rate": 1.70261066969353e-05, + "loss": 2.9792, + "theoretical_loss": 3.3475458801158093, + "tokens_seen": 2705326080 + }, + { + "epoch": 0.97, + "learning_rate": 1.6836927733636017e-05, + "loss": 3.0474, + "theoretical_loss": 3.3474440364973415, + "tokens_seen": 2706374656 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.48271217942237854, + "objective/train/docs_used": 1523597, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.2933883666992188, + "objective/train/original_loss": 2.293388843536377, + "objective/train/theoretical_loss": 3.347418583484306, + "objective/train/tokens_used": 2727096800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23795966804027557, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049480676651001, + "objective/train/weighted_lm_loss": 2.4072484970092773, + "objective/train/weights_max": 1.051218032836914, + "objective/train/weights_min": 0.955068051815033, + "theoretical_loss": 3.347418583484306, + "tokens_seen": 2706636800 + }, + { + "epoch": 0.97, + "learning_rate": 1.664774877033674e-05, + "loss": 2.9809, + "theoretical_loss": 3.347342243373844, + "tokens_seen": 2707423232 + }, + { + "epoch": 0.97, + "learning_rate": 1.6458569807037458e-05, + "loss": 2.9979, + "theoretical_loss": 3.3472405007007384, + "tokens_seen": 2708471808 + }, + { + "epoch": 0.97, + "learning_rate": 1.6269390843738175e-05, + "loss": 3.0455, + "theoretical_loss": 3.347138808433504, + "tokens_seen": 2709520384 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.48901796340942383, + "objective/train/docs_used": 1525609, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7998077869415283, + "objective/train/original_loss": 2.7998077869415283, + "objective/train/theoretical_loss": 3.3471006868194775, + "objective/train/tokens_used": 2730373600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2417827993631363, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050130844116211, + "objective/train/weighted_lm_loss": 2.9398789405822754, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.960767388343811, + "theoretical_loss": 3.3471006868194775, + "tokens_seen": 2709913600 + }, + { + "epoch": 0.97, + "learning_rate": 1.6080211880438896e-05, + "loss": 2.9932, + "theoretical_loss": 3.3470371665276755, + "tokens_seen": 2710568960 + }, + { + "epoch": 0.97, + "learning_rate": 1.5891032917139613e-05, + "loss": 3.0507, + "theoretical_loss": 3.3469355749388447, + "tokens_seen": 2711617536 + }, + { + "epoch": 0.97, + "learning_rate": 1.570185395384033e-05, + "loss": 3.083, + "theoretical_loss": 3.3468340336226596, + "tokens_seen": 2712666112 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.46617740392684937, + "objective/train/docs_used": 1527506, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0006046295166016, + "objective/train/original_loss": 3.0006043910980225, + "objective/train/theoretical_loss": 3.3467832818029644, + "objective/train/tokens_used": 2733650400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22822509706020355, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.047777771949768, + "objective/train/weighted_lm_loss": 3.147068500518799, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.9607921838760376, + "theoretical_loss": 3.3467832818029644, + "tokens_seen": 2713190400 + }, + { + "epoch": 0.97, + "learning_rate": 1.5512674990541054e-05, + "loss": 3.0005, + "theoretical_loss": 3.3467325425348244, + "tokens_seen": 2713714688 + }, + { + "epoch": 0.97, + "learning_rate": 1.532349602724177e-05, + "loss": 3.0609, + "theoretical_loss": 3.3466311016310994, + "tokens_seen": 2714763264 + }, + { + "epoch": 0.97, + "learning_rate": 1.513431706394249e-05, + "loss": 3.018, + "theoretical_loss": 3.3465297108673013, + "tokens_seen": 2715811840 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.4781047999858856, + "objective/train/docs_used": 1529013, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.908228874206543, + "objective/train/original_loss": 2.908228874206543, + "objective/train/theoretical_loss": 3.346466367081999, + "objective/train/tokens_used": 2736927200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23485645651817322, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0490041971206665, + "objective/train/weighted_lm_loss": 3.0513052940368652, + "objective/train/weights_max": 1.0512176752090454, + "objective/train/weights_min": 0.959873616695404, + "theoretical_loss": 3.346466367081999, + "tokens_seen": 2716467200 + }, + { + "epoch": 0.97, + "learning_rate": 1.4945138100643209e-05, + "loss": 2.99, + "theoretical_loss": 3.346428370199302, + "tokens_seen": 2716860416 + }, + { + "epoch": 0.97, + "learning_rate": 1.4755959137343928e-05, + "loss": 3.1079, + "theoretical_loss": 3.3463270795830296, + "tokens_seen": 2717908992 + }, + { + "epoch": 0.97, + "learning_rate": 1.4566780174044647e-05, + "loss": 3.1435, + "theoretical_loss": 3.3462258389744677, + "tokens_seen": 2718957568 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.4586866497993469, + "objective/train/docs_used": 1529588, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8062684535980225, + "objective/train/original_loss": 2.8062686920166016, + "objective/train/theoretical_loss": 3.34614994130916, + "objective/train/tokens_used": 2740204000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22812047600746155, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0470279455184937, + "objective/train/weighted_lm_loss": 2.941352128982544, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.9811699390411377, + "theoretical_loss": 3.34614994130916, + "tokens_seen": 2719744000 + }, + { + "epoch": 0.97, + "learning_rate": 1.4377601210745365e-05, + "loss": 3.1722, + "theoretical_loss": 3.3461246483296563, + "tokens_seen": 2720006144 + }, + { + "epoch": 0.97, + "learning_rate": 1.4188422247446084e-05, + "loss": 3.183, + "theoretical_loss": 3.346023507604691, + "tokens_seen": 2721054720 + }, + { + "epoch": 0.97, + "learning_rate": 1.3999243284146803e-05, + "loss": 3.0404, + "theoretical_loss": 3.3459224167557213, + "tokens_seen": 2722103296 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.4828481078147888, + "objective/train/docs_used": 1531474, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.01423716545105, + "objective/train/original_loss": 3.014237403869629, + "objective/train/theoretical_loss": 3.345834003142347, + "objective/train/tokens_used": 2743480800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2370041161775589, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0494893789291382, + "objective/train/weighted_lm_loss": 3.1627633571624756, + "objective/train/weights_max": 1.05121648311615, + "objective/train/weights_min": 0.9518007040023804, + "theoretical_loss": 3.345834003142347, + "tokens_seen": 2723020800 + }, + { + "epoch": 0.97, + "learning_rate": 1.3810064320847522e-05, + "loss": 3.0889, + "theoretical_loss": 3.3458213757389537, + "tokens_seen": 2723151872 + }, + { + "epoch": 0.97, + "learning_rate": 1.362088535754824e-05, + "loss": 3.071, + "theoretical_loss": 3.34572038451065, + "tokens_seen": 2724200448 + }, + { + "epoch": 0.97, + "learning_rate": 1.343170639424896e-05, + "loss": 3.1018, + "theoretical_loss": 3.3456194430271258, + "tokens_seen": 2725249024 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.48170891404151917, + "objective/train/docs_used": 1533341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8722846508026123, + "objective/train/original_loss": 2.872284412384033, + "objective/train/theoretical_loss": 3.3455185512447527, + "objective/train/tokens_used": 2746757600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23601827025413513, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049370527267456, + "objective/train/weighted_lm_loss": 3.012676477432251, + "objective/train/weights_max": 1.0512162446975708, + "objective/train/weights_min": 0.9820363521575928, + "theoretical_loss": 3.3455185512447527, + "tokens_seen": 2726297600 + }, + { + "epoch": 0.97, + "learning_rate": 1.324252743094968e-05, + "loss": 3.0983, + "theoretical_loss": 3.3455185512447527, + "tokens_seen": 2726297600 + }, + { + "epoch": 0.97, + "learning_rate": 1.3053348467650397e-05, + "loss": 3.1182, + "theoretical_loss": 3.3454177091199586, + "tokens_seen": 2727346176 + }, + { + "epoch": 0.97, + "learning_rate": 1.2864169504351116e-05, + "loss": 3.0496, + "theoretical_loss": 3.3453169166092236, + "tokens_seen": 2728394752 + }, + { + "epoch": 0.97, + "learning_rate": 1.2674990541051837e-05, + "loss": 3.0598, + "theoretical_loss": 3.345216173669085, + "tokens_seen": 2729443328 + }, + { + "epoch": 0.97, + "objective/train/advantage_avg": 0.48440757393836975, + "objective/train/docs_used": 1535248, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1303958892822266, + "objective/train/original_loss": 3.1303963661193848, + "objective/train/theoretical_loss": 3.3452035842848376, + "objective/train/tokens_used": 2750034400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2397458553314209, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0496593713760376, + "objective/train/weighted_lm_loss": 3.2855958938598633, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.954217791557312, + "theoretical_loss": 3.3452035842848376, + "tokens_seen": 2729574400 + }, + { + "epoch": 0.98, + "learning_rate": 1.2485811577752554e-05, + "loss": 3.0877, + "theoretical_loss": 3.345115480256134, + "tokens_seen": 2730491904 + }, + { + "epoch": 0.98, + "learning_rate": 1.2296632614453273e-05, + "loss": 3.1178, + "theoretical_loss": 3.3450148363270156, + "tokens_seen": 2731540480 + }, + { + "epoch": 0.98, + "learning_rate": 1.2107453651153993e-05, + "loss": 3.1079, + "theoretical_loss": 3.3449142418384312, + "tokens_seen": 2732589056 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.4586898982524872, + "objective/train/docs_used": 1536975, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1073994636535645, + "objective/train/original_loss": 3.1073994636535645, + "objective/train/theoretical_loss": 3.3448891009362995, + "objective/train/tokens_used": 2753311200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.22674831748008728, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0470212697982788, + "objective/train/weighted_lm_loss": 3.251424789428711, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9534921050071716, + "theoretical_loss": 3.3448891009362995, + "tokens_seen": 2732851200 + }, + { + "epoch": 0.98, + "learning_rate": 1.191827468785471e-05, + "loss": 3.0302, + "theoretical_loss": 3.344813696747135, + "tokens_seen": 2733637632 + }, + { + "epoch": 0.98, + "learning_rate": 1.172909572455543e-05, + "loss": 3.1274, + "theoretical_loss": 3.3447132010099363, + "tokens_seen": 2734686208 + }, + { + "epoch": 0.98, + "learning_rate": 1.153991676125615e-05, + "loss": 3.1093, + "theoretical_loss": 3.344612754583699, + "tokens_seen": 2735734784 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.4917672872543335, + "objective/train/docs_used": 1538829, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1836228370666504, + "objective/train/original_loss": 3.1836228370666504, + "objective/train/theoretical_loss": 3.344575099878048, + "objective/train/tokens_used": 2756588000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24380403757095337, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050416111946106, + "objective/train/weighted_lm_loss": 3.343857765197754, + "objective/train/weights_max": 1.0512166023254395, + "objective/train/weights_min": 0.969031035900116, + "theoretical_loss": 3.344575099878048, + "tokens_seen": 2736128000 + }, + { + "epoch": 0.98, + "learning_rate": 1.1350737797956867e-05, + "loss": 3.1061, + "theoretical_loss": 3.3445123574253417, + "tokens_seen": 2736783360 + }, + { + "epoch": 0.98, + "learning_rate": 1.1161558834657586e-05, + "loss": 3.1514, + "theoretical_loss": 3.3444120094918346, + "tokens_seen": 2737831936 + }, + { + "epoch": 0.98, + "learning_rate": 1.0972379871358306e-05, + "loss": 3.0688, + "theoretical_loss": 3.344311710740205, + "tokens_seen": 2738880512 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.490246057510376, + "objective/train/docs_used": 1540914, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9021008014678955, + "objective/train/original_loss": 2.9021010398864746, + "objective/train/theoretical_loss": 3.3442615797941793, + "objective/train/tokens_used": 2759864800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24154502153396606, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502524375915527, + "objective/train/weighted_lm_loss": 3.0474276542663574, + "objective/train/weights_max": 1.051216959953308, + "objective/train/weights_min": 0.9797579646110535, + "theoretical_loss": 3.3442615797941793, + "tokens_seen": 2739404800 + }, + { + "epoch": 0.98, + "learning_rate": 1.0783200908059024e-05, + "loss": 3.0935, + "theoretical_loss": 3.344211461127532, + "tokens_seen": 2739929088 + }, + { + "epoch": 0.98, + "learning_rate": 1.0594021944759742e-05, + "loss": 3.1297, + "theoretical_loss": 3.3441112606109504, + "tokens_seen": 2740977664 + }, + { + "epoch": 0.98, + "learning_rate": 1.0404842981460461e-05, + "loss": 3.1092, + "theoretical_loss": 3.344011109147647, + "tokens_seen": 2742026240 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.47691646218299866, + "objective/train/docs_used": 1542635, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9913744926452637, + "objective/train/original_loss": 2.9913742542266846, + "objective/train/theoretical_loss": 3.3439485393739488, + "objective/train/tokens_used": 2763141600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2393837571144104, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0489083528518677, + "objective/train/weighted_lm_loss": 3.13615083694458, + "objective/train/weights_max": 1.0512189865112305, + "objective/train/weights_min": 0.9514455199241638, + "theoretical_loss": 3.3439485393739488, + "tokens_seen": 2742681600 + }, + { + "epoch": 0.98, + "learning_rate": 1.021566401816118e-05, + "loss": 3.1106, + "theoretical_loss": 3.343911006694863, + "tokens_seen": 2743074816 + }, + { + "epoch": 0.98, + "learning_rate": 1.0026485054861899e-05, + "loss": 3.0931, + "theoretical_loss": 3.3438109532098936, + "tokens_seen": 2744123392 + }, + { + "epoch": 0.98, + "learning_rate": 9.837306091562618e-06, + "loss": 3.0949, + "theoretical_loss": 3.343710948650087, + "tokens_seen": 2745171968 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.49094340205192566, + "objective/train/docs_used": 1544447, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.071685791015625, + "objective/train/original_loss": 3.071685791015625, + "objective/train/theoretical_loss": 3.343635977311743, + "objective/train/tokens_used": 2766418400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24382232129573822, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0503337383270264, + "objective/train/weighted_lm_loss": 3.2261123657226562, + "objective/train/weights_max": 1.0512171983718872, + "objective/train/weights_min": 0.9554123282432556, + "theoretical_loss": 3.343635977311743, + "tokens_seen": 2745958400 + }, + { + "epoch": 0.98, + "learning_rate": 9.648127128263337e-06, + "loss": 3.1831, + "theoretical_loss": 3.3436109929728453, + "tokens_seen": 2746220544 + }, + { + "epoch": 0.98, + "learning_rate": 9.458948164964057e-06, + "loss": 3.0834, + "theoretical_loss": 3.3435110861356234, + "tokens_seen": 2747269120 + }, + { + "epoch": 0.98, + "learning_rate": 9.269769201664774e-06, + "loss": 3.1124, + "theoretical_loss": 3.3434112280959294, + "tokens_seen": 2748317696 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.47922295331954956, + "objective/train/docs_used": 1546277, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0440077781677246, + "objective/train/original_loss": 3.0440077781677246, + "objective/train/theoretical_loss": 3.343323892307056, + "objective/train/tokens_used": 2769695200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23427808284759521, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0491130352020264, + "objective/train/weighted_lm_loss": 3.1924755573272705, + "objective/train/weights_max": 1.0512192249298096, + "objective/train/weights_min": 0.9526734948158264, + "theoretical_loss": 3.343323892307056, + "tokens_seen": 2749235200 + }, + { + "epoch": 0.98, + "learning_rate": 9.080590238365493e-06, + "loss": 3.091, + "theoretical_loss": 3.343311418811325, + "tokens_seen": 2749366272 + }, + { + "epoch": 0.98, + "learning_rate": 8.891411275066214e-06, + "loss": 3.1046, + "theoretical_loss": 3.3432116582394253, + "tokens_seen": 2750414848 + }, + { + "epoch": 0.98, + "learning_rate": 8.702232311766931e-06, + "loss": 3.1246, + "theoretical_loss": 3.3431119463378973, + "tokens_seen": 2751463424 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.4734799861907959, + "objective/train/docs_used": 1547942, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2146964073181152, + "objective/train/original_loss": 3.2146964073181152, + "objective/train/theoretical_loss": 3.343012283064462, + "objective/train/tokens_used": 2772972000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23234401643276215, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0485291481018066, + "objective/train/weighted_lm_loss": 3.3720321655273438, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.963914155960083, + "theoretical_loss": 3.343012283064462, + "tokens_seen": 2752512000 + }, + { + "epoch": 0.98, + "learning_rate": 8.51305334846765e-06, + "loss": 3.0917, + "theoretical_loss": 3.343012283064462, + "tokens_seen": 2752512000 + }, + { + "epoch": 0.98, + "learning_rate": 8.32387438516837e-06, + "loss": 3.0486, + "theoretical_loss": 3.342912668376892, + "tokens_seen": 2753560576 + }, + { + "epoch": 0.98, + "learning_rate": 8.134695421869088e-06, + "loss": 3.0753, + "theoretical_loss": 3.342813102233014, + "tokens_seen": 2754609152 + }, + { + "epoch": 0.98, + "learning_rate": 7.945516458569806e-06, + "loss": 3.0843, + "theoretical_loss": 3.3427135845907063, + "tokens_seen": 2755657728 + }, + { + "epoch": 0.98, + "objective/train/advantage_avg": 0.4692471921443939, + "objective/train/docs_used": 1550559, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7862179279327393, + "objective/train/original_loss": 2.78621768951416, + "objective/train/theoretical_loss": 3.342701148293589, + "objective/train/tokens_used": 2776248800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23171769082546234, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0481022596359253, + "objective/train/weighted_lm_loss": 2.9225549697875977, + "objective/train/weights_max": 1.0512168407440186, + "objective/train/weights_min": 0.9525762796401978, + "theoretical_loss": 3.342701148293589, + "tokens_seen": 2755788800 + }, + { + "epoch": 0.98, + "learning_rate": 7.756337495270527e-06, + "loss": 3.1165, + "theoretical_loss": 3.3426141154079008, + "tokens_seen": 2756706304 + }, + { + "epoch": 0.99, + "learning_rate": 7.567158531971245e-06, + "loss": 2.9891, + "theoretical_loss": 3.3425146946425803, + "tokens_seen": 2757754880 + }, + { + "epoch": 0.99, + "learning_rate": 7.377979568671964e-06, + "loss": 3.0308, + "theoretical_loss": 3.3424153222527817, + "tokens_seen": 2758803456 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.48670539259910583, + "objective/train/docs_used": 1552546, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9341201782226562, + "objective/train/original_loss": 2.934119701385498, + "objective/train/theoretical_loss": 3.3423904867090948, + "objective/train/tokens_used": 2779525600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24417608976364136, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049911618232727, + "objective/train/weighted_lm_loss": 3.079969882965088, + "objective/train/weights_max": 1.051216721534729, + "objective/train/weights_min": 0.9527941942214966, + "theoretical_loss": 3.3423904867090948, + "tokens_seen": 2759065600 + }, + { + "epoch": 0.99, + "learning_rate": 7.188800605372683e-06, + "loss": 3.0546, + "theoretical_loss": 3.342315998196593, + "tokens_seen": 2759852032 + }, + { + "epoch": 0.99, + "learning_rate": 6.9996216420734016e-06, + "loss": 3.0525, + "theoretical_loss": 3.342216722432155, + "tokens_seen": 2760900608 + }, + { + "epoch": 0.99, + "learning_rate": 6.81044267877412e-06, + "loss": 3.0923, + "theoretical_loss": 3.3421174949176606, + "tokens_seen": 2761949184 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.48612168431282043, + "objective/train/docs_used": 1554602, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1402394771575928, + "objective/train/original_loss": 3.140239715576172, + "objective/train/theoretical_loss": 3.3420802970306394, + "objective/train/tokens_used": 2782802400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23997044563293457, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0498319864273071, + "objective/train/weighted_lm_loss": 3.2957353591918945, + "objective/train/weights_max": 1.0512193441390991, + "objective/train/weights_min": 0.9562333226203918, + "theoretical_loss": 3.3420802970306394, + "tokens_seen": 2762342400 + }, + { + "epoch": 0.99, + "learning_rate": 6.62126371547484e-06, + "loss": 3.0371, + "theoretical_loss": 3.3420183156113543, + "tokens_seen": 2762997760 + }, + { + "epoch": 0.99, + "learning_rate": 6.432084752175558e-06, + "loss": 3.0072, + "theoretical_loss": 3.3419191844715326, + "tokens_seen": 2764046336 + }, + { + "epoch": 0.99, + "learning_rate": 6.242905788876277e-06, + "loss": 2.9878, + "theoretical_loss": 3.341820101456545, + "tokens_seen": 2765094912 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.48010215163230896, + "objective/train/docs_used": 1555428, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.860677480697632, + "objective/train/original_loss": 2.860677480697632, + "objective/train/theoretical_loss": 3.341770577982862, + "objective/train/tokens_used": 2786079200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23556600511074066, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049207329750061, + "objective/train/weighted_lm_loss": 3.00154709815979, + "objective/train/weights_max": 1.0512183904647827, + "objective/train/weights_min": 0.9520919322967529, + "theoretical_loss": 3.341770577982862, + "tokens_seen": 2765619200 + }, + { + "epoch": 0.99, + "learning_rate": 6.053726825576997e-06, + "loss": 3.1445, + "theoretical_loss": 3.3417210665247916, + "tokens_seen": 2766143488 + }, + { + "epoch": 0.99, + "learning_rate": 5.864547862277715e-06, + "loss": 3.1284, + "theoretical_loss": 3.3416220796347242, + "tokens_seen": 2767192064 + }, + { + "epoch": 0.99, + "learning_rate": 5.6753688989784335e-06, + "loss": 3.041, + "theoretical_loss": 3.341523140744847, + "tokens_seen": 2768240640 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.48793041706085205, + "objective/train/docs_used": 1557592, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1938629150390625, + "objective/train/original_loss": 3.1938624382019043, + "objective/train/theoretical_loss": 3.341461328295353, + "objective/train/tokens_used": 2789356000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24034656584262848, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050014853477478, + "objective/train/weighted_lm_loss": 3.354142427444458, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9659320116043091, + "theoretical_loss": 3.341461328295353, + "tokens_seen": 2768896000 + }, + { + "epoch": 0.99, + "learning_rate": 5.486189935679153e-06, + "loss": 3.1212, + "theoretical_loss": 3.3414242498137154, + "tokens_seen": 2769289216 + }, + { + "epoch": 0.99, + "learning_rate": 5.297010972379871e-06, + "loss": 3.0925, + "theoretical_loss": 3.341325406799936, + "tokens_seen": 2770337792 + }, + { + "epoch": 0.99, + "learning_rate": 5.10783200908059e-06, + "loss": 3.1018, + "theoretical_loss": 3.341226611662167, + "tokens_seen": 2771386368 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.4908839166164398, + "objective/train/docs_used": 1559642, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9026548862457275, + "objective/train/original_loss": 2.9026551246643066, + "objective/train/theoretical_loss": 3.341152546702631, + "objective/train/tokens_used": 2792632800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24205824732780457, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050318717956543, + "objective/train/weighted_lm_loss": 3.049380302429199, + "objective/train/weights_max": 1.0512174367904663, + "objective/train/weights_min": 0.9522111415863037, + "theoretical_loss": 3.341152546702631, + "tokens_seen": 2772172800 + }, + { + "epoch": 0.99, + "learning_rate": 4.918653045781309e-06, + "loss": 3.0673, + "theoretical_loss": 3.3411278643591173, + "tokens_seen": 2772434944 + }, + { + "epoch": 0.99, + "learning_rate": 4.729474082482029e-06, + "loss": 3.1004, + "theoretical_loss": 3.341029164849549, + "tokens_seen": 2773483520 + }, + { + "epoch": 0.99, + "learning_rate": 4.540295119182747e-06, + "loss": 3.2102, + "theoretical_loss": 3.3409305130922724, + "tokens_seen": 2774532096 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.4898316562175751, + "objective/train/docs_used": 1561371, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8627278804779053, + "objective/train/original_loss": 2.862727642059326, + "objective/train/theoretical_loss": 3.3408442319441174, + "objective/train/tokens_used": 2795909600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24388957023620605, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0502229928970337, + "objective/train/weighted_lm_loss": 3.005845069885254, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9519106149673462, + "theoretical_loss": 3.3408442319441174, + "tokens_seen": 2775449600 + }, + { + "epoch": 0.99, + "learning_rate": 4.3511161558834655e-06, + "loss": 3.1312, + "theoretical_loss": 3.3408319090461513, + "tokens_seen": 2775580672 + }, + { + "epoch": 0.99, + "learning_rate": 4.161937192584185e-06, + "loss": 3.0719, + "theoretical_loss": 3.340733352670099, + "tokens_seen": 2776629248 + }, + { + "epoch": 0.99, + "learning_rate": 3.972758229284903e-06, + "loss": 3.1596, + "theoretical_loss": 3.3406348439230804, + "tokens_seen": 2777677824 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.48310354351997375, + "objective/train/docs_used": 1563208, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1156625747680664, + "objective/train/original_loss": 3.1156623363494873, + "objective/train/theoretical_loss": 3.3405363827641112, + "objective/train/tokens_used": 2799186400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.2416696399450302, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495388507843018, + "objective/train/weighted_lm_loss": 3.269275188446045, + "objective/train/weights_max": 1.0512175559997559, + "objective/train/weights_min": 0.951533854007721, + "theoretical_loss": 3.3405363827641112, + "tokens_seen": 2778726400 + }, + { + "epoch": 0.99, + "learning_rate": 3.7835792659856225e-06, + "loss": 3.1249, + "theoretical_loss": 3.3405363827641112, + "tokens_seen": 2778726400 + }, + { + "epoch": 0.99, + "learning_rate": 3.5944003026863414e-06, + "loss": 3.1159, + "theoretical_loss": 3.340437969152257, + "tokens_seen": 2779774976 + }, + { + "epoch": 0.99, + "learning_rate": 3.40522133938706e-06, + "loss": 3.1107, + "theoretical_loss": 3.340339603046636, + "tokens_seen": 2780823552 + }, + { + "epoch": 0.99, + "learning_rate": 3.216042376087779e-06, + "loss": 3.1666, + "theoretical_loss": 3.3402412844064138, + "tokens_seen": 2781872128 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.4891952872276306, + "objective/train/docs_used": 1564970, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.351323127746582, + "objective/train/original_loss": 3.351323127746582, + "objective/train/theoretical_loss": 3.3402289979117654, + "objective/train/tokens_used": 2802463200, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24344053864479065, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.050157070159912, + "objective/train/weighted_lm_loss": 3.5188140869140625, + "objective/train/weights_max": 1.05121910572052, + "objective/train/weights_min": 0.9516726732254028, + "theoretical_loss": 3.3402289979117654, + "tokens_seen": 2782003200 + }, + { + "epoch": 0.99, + "learning_rate": 3.0268634127884983e-06, + "loss": 3.1592, + "theoretical_loss": 3.340143013190809, + "tokens_seen": 2782920704 + }, + { + "epoch": 0.99, + "learning_rate": 2.8376844494892168e-06, + "loss": 3.1745, + "theoretical_loss": 3.3400447893590903, + "tokens_seen": 2783969280 + }, + { + "epoch": 0.99, + "learning_rate": 2.6485054861899356e-06, + "loss": 3.1288, + "theoretical_loss": 3.339946612870576, + "tokens_seen": 2785017856 + }, + { + "epoch": 0.99, + "objective/train/advantage_avg": 0.4746108055114746, + "objective/train/docs_used": 1566749, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9263644218444824, + "objective/train/original_loss": 2.9263644218444824, + "objective/train/theoretical_loss": 3.33992207614106, + "objective/train/tokens_used": 2805740000, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23878102004528046, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.048674464225769, + "objective/train/weighted_lm_loss": 3.06941556930542, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9526550769805908, + "theoretical_loss": 3.33992207614106, + "tokens_seen": 2785280000 + }, + { + "epoch": 1.0, + "learning_rate": 2.4593265228906545e-06, + "loss": 3.12, + "theoretical_loss": 3.3398484836846345, + "tokens_seen": 2786066432 + }, + { + "epoch": 1.0, + "learning_rate": 2.2701475595913733e-06, + "loss": 3.0822, + "theoretical_loss": 3.3397504017606847, + "tokens_seen": 2787115008 + }, + { + "epoch": 1.0, + "learning_rate": 2.0809685962920926e-06, + "loss": 3.1112, + "theoretical_loss": 3.3396523670581963, + "tokens_seen": 2788163584 + }, + { + "epoch": 1.0, + "objective/train/advantage_avg": 0.4833011329174042, + "objective/train/docs_used": 1568853, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1042532920837402, + "objective/train/original_loss": 3.104253053665161, + "objective/train/theoretical_loss": 3.339615616210782, + "objective/train/tokens_used": 2809016800, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24029603600502014, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0495517253875732, + "objective/train/weighted_lm_loss": 3.2570862770080566, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9518541097640991, + "theoretical_loss": 3.339615616210782, + "tokens_seen": 2788556800 + }, + { + "epoch": 1.0, + "learning_rate": 1.8917896329928113e-06, + "loss": 3.08, + "theoretical_loss": 3.339554379536688, + "tokens_seen": 2789212160 + }, + { + "epoch": 1.0, + "learning_rate": 1.70261066969353e-06, + "loss": 3.0355, + "theoretical_loss": 3.339456439155728, + "tokens_seen": 2790260736 + }, + { + "epoch": 1.0, + "learning_rate": 1.5134317063942492e-06, + "loss": 3.0258, + "theoretical_loss": 3.339358545874936, + "tokens_seen": 2791309312 + }, + { + "epoch": 1.0, + "objective/train/advantage_avg": 0.48446664214134216, + "objective/train/docs_used": 1571029, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.068040132522583, + "objective/train/original_loss": 3.068040132522583, + "objective/train/theoretical_loss": 3.3393096168844973, + "objective/train/tokens_used": 2812293600, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23969173431396484, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049665093421936, + "objective/train/weighted_lm_loss": 3.2199342250823975, + "objective/train/weights_max": 1.0512182712554932, + "objective/train/weights_min": 0.9749250411987305, + "theoretical_loss": 3.3393096168844973, + "tokens_seen": 2791833600 + }, + { + "epoch": 1.0, + "learning_rate": 1.3242527430949678e-06, + "loss": 3.0623, + "theoretical_loss": 3.3392606996539804, + "tokens_seen": 2792357888 + }, + { + "epoch": 1.0, + "learning_rate": 1.1350737797956867e-06, + "loss": 3.0497, + "theoretical_loss": 3.3391629004525782, + "tokens_seen": 2793406464 + }, + { + "epoch": 1.0, + "learning_rate": 9.458948164964056e-07, + "loss": 3.0735, + "theoretical_loss": 3.3390651482304983, + "tokens_seen": 2794455040 + }, + { + "epoch": 1.0, + "objective/train/advantage_avg": 0.4827611446380615, + "objective/train/docs_used": 1572728, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.77453351020813, + "objective/train/original_loss": 2.774533748626709, + "objective/train/theoretical_loss": 3.3390040769305287, + "objective/train/tokens_used": 2815570400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.23958729207515717, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.049493670463562, + "objective/train/weighted_lm_loss": 2.9126195907592773, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.95488440990448, + "theoretical_loss": 3.3390040769305287, + "tokens_seen": 2795110400 + }, + { + "epoch": 1.0, + "learning_rate": 7.567158531971246e-07, + "loss": 3.0685, + "theoretical_loss": 3.3389674429475575, + "tokens_seen": 2795503616 + }, + { + "epoch": 1.0, + "learning_rate": 5.675368898978433e-07, + "loss": 3.0212, + "theoretical_loss": 3.3388697845636224, + "tokens_seen": 2796552192 + }, + { + "epoch": 1.0, + "learning_rate": 3.783579265985623e-07, + "loss": 3.0448, + "theoretical_loss": 3.338772173038609, + "tokens_seen": 2797600768 + } + ], + "max_steps": 2670, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.427729276882387e+18, + "trial_name": null, + "trial_params": null +}