diff --git "a/checkpoint-1673/trainer_state.json" "b/checkpoint-1673/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1673/trainer_state.json" @@ -0,0 +1,36427 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5316174134095965, + "global_step": 1673, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.125e-05, + "loss": 5.7972, + "theoretical_loss": 10.87642657795271, + "tokens_seen": 1048576 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.4939412772655487, + "objective/train/docs_used": 13112, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 10.812536239624023, + "objective/train/original_loss": 10.812536239624023, + "objective/train/theoretical_loss": 9.881761617888868, + "objective/train/tokens_used": 22098400, + "objective/train/value_avg": -0.5, + "objective/train/value_loss": 0.24582301080226898, + "objective/train/value_max": -0.5, + "objective/train/value_min": -0.5, + "objective/train/value_reward_corr": NaN, + "objective/train/value_std": 0.0, + "objective/train/weight_avg": 1.0506439208984375, + "objective/train/weighted_lm_loss": 11.360099792480469, + "objective/train/weights_max": 1.0512173175811768, + "objective/train/weights_min": 0.9741270542144775, + "theoretical_loss": 9.881761617888868, + "tokens_seen": 1638400 + }, + { + "epoch": 0.0, + "learning_rate": 6.25e-05, + "loss": 5.8021, + "theoretical_loss": 9.382678282406216, + "tokens_seen": 2097152 + }, + { + "epoch": 0.0, + "learning_rate": 9.375e-05, + "loss": 5.2419, + "theoretical_loss": 8.634232609231233, + "tokens_seen": 3145728 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.4874303340911865, + "objective/train/docs_used": 13755, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 8.925251960754395, + "objective/train/original_loss": 8.925251960754395, + "objective/train/theoretical_loss": 8.563481156106828, + "objective/train/tokens_used": 23736800, + "objective/train/value_avg": -0.494873046875, + "objective/train/value_loss": 0.2396508902311325, + "objective/train/value_max": -0.494384765625, + "objective/train/value_min": -0.495849609375, + "objective/train/value_reward_corr": -0.0056927412690485155, + "objective/train/value_std": 0.00018894672393798828, + "objective/train/weight_avg": 1.0499612092971802, + "objective/train/weighted_lm_loss": 9.37061882019043, + "objective/train/weights_max": 1.0507713556289673, + "objective/train/weights_min": 0.9680130481719971, + "theoretical_loss": 8.563481156106828, + "tokens_seen": 3276800 + }, + { + "epoch": 0.0, + "learning_rate": 0.000125, + "loss": 5.0137, + "theoretical_loss": 8.152440604135377, + "tokens_seen": 4194304 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.4634787440299988, + "objective/train/docs_used": 15099, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 9.516928672790527, + "objective/train/original_loss": 9.516928672790527, + "objective/train/theoretical_loss": 7.902954003979827, + "objective/train/tokens_used": 25375200, + "objective/train/value_avg": -0.490966796875, + "objective/train/value_loss": 0.22520850598812103, + "objective/train/value_max": -0.4873046875, + "objective/train/value_min": -0.492919921875, + "objective/train/value_reward_corr": -0.45809063708561365, + "objective/train/value_std": 0.0011243820190429688, + "objective/train/weight_avg": 1.0474921464920044, + "objective/train/weighted_lm_loss": 9.976279258728027, + "objective/train/weights_max": 1.0504635572433472, + "objective/train/weights_min": 0.9506087303161621, + "theoretical_loss": 7.902954003979827, + "tokens_seen": 4915200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015625, + "loss": 5.1554, + "theoretical_loss": 7.804563746449924, + "tokens_seen": 5242880 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001875, + "loss": 4.8356, + "theoretical_loss": 7.536027470795679, + "tokens_seen": 6291456 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.4376768469810486, + "objective/train/docs_used": 15777, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 9.594051361083984, + "objective/train/original_loss": 9.5940523147583, + "objective/train/theoretical_loss": 7.477757209543791, + "objective/train/tokens_used": 27013600, + "objective/train/value_avg": -0.458251953125, + "objective/train/value_loss": 0.1985439658164978, + "objective/train/value_max": -0.45654296875, + "objective/train/value_min": -0.46435546875, + "objective/train/value_reward_corr": -0.0708362458838252, + "objective/train/value_std": 0.0016679763793945312, + "objective/train/weight_avg": 1.044775366783142, + "objective/train/weighted_lm_loss": 10.02287769317627, + "objective/train/weights_max": 1.0473999977111816, + "objective/train/weights_min": 0.9476447701454163, + "theoretical_loss": 7.477757209543791, + "tokens_seen": 6553600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00021875, + "loss": 4.8462, + "theoretical_loss": 7.319437165569436, + "tokens_seen": 7340032 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.4267476797103882, + "objective/train/docs_used": 17166, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 8.85726261138916, + "objective/train/original_loss": 8.857263565063477, + "objective/train/theoretical_loss": 7.170744799246725, + "objective/train/tokens_used": 28652000, + "objective/train/value_avg": -0.435791015625, + "objective/train/value_loss": 0.18594633042812347, + "objective/train/value_max": -0.433837890625, + "objective/train/value_min": -0.4404296875, + "objective/train/value_reward_corr": -0.016674371910130683, + "objective/train/value_std": 0.0011358261108398438, + "objective/train/weight_avg": 1.0436179637908936, + "objective/train/weighted_lm_loss": 9.24317455291748, + "objective/train/weights_max": 1.0446817874908447, + "objective/train/weights_min": 0.9507918357849121, + "theoretical_loss": 7.170744799246725, + "tokens_seen": 8192000 + }, + { + "epoch": 0.0, + "learning_rate": 0.00025, + "loss": 4.7145, + "theoretical_loss": 7.139227903207399, + "tokens_seen": 8388608 + }, + { + "epoch": 0.0, + "learning_rate": 0.00028125000000000003, + "loss": 4.6338, + "theoretical_loss": 6.985769514638539, + "tokens_seen": 9437184 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.35531085729599, + "objective/train/docs_used": 17861, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 8.609107971191406, + "objective/train/original_loss": 8.609107971191406, + "objective/train/theoretical_loss": 6.9337529803906595, + "objective/train/tokens_used": 30290400, + "objective/train/value_avg": -0.376220703125, + "objective/train/value_loss": 0.1328696757555008, + "objective/train/value_max": -0.37158203125, + "objective/train/value_min": -0.386474609375, + "objective/train/value_reward_corr": -0.07858682833981219, + "objective/train/value_std": 0.00318145751953125, + "objective/train/weight_avg": 1.0362035036087036, + "objective/train/weighted_lm_loss": 8.918526649475098, + "objective/train/weights_max": 1.039302110671997, + "objective/train/weights_min": 0.9397578835487366, + "theoretical_loss": 6.9337529803906595, + "tokens_seen": 9830400 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003125, + "loss": 4.5038, + "theoretical_loss": 6.85271964810239, + "tokens_seen": 10485760 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.3184737265110016, + "objective/train/docs_used": 18493, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 8.483487129211426, + "objective/train/original_loss": 8.483488082885742, + "objective/train/theoretical_loss": 6.742605135440851, + "objective/train/tokens_used": 31928800, + "objective/train/value_avg": -0.33740234375, + "objective/train/value_loss": 0.10629621148109436, + "objective/train/value_max": -0.33154296875, + "objective/train/value_min": -0.3662109375, + "objective/train/value_reward_corr": -0.10297952204944122, + "objective/train/value_std": 0.0091705322265625, + "objective/train/weight_avg": 1.0323846340179443, + "objective/train/weighted_lm_loss": 8.756911277770996, + "objective/train/weights_max": 1.0371956825256348, + "objective/train/weights_min": 0.951545000076294, + "theoretical_loss": 6.742605135440851, + "tokens_seen": 11468800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00034375, + "loss": 4.3677, + "theoretical_loss": 6.735696451044834, + "tokens_seen": 11534336 + }, + { + "epoch": 0.0, + "learning_rate": 0.000375, + "loss": 4.2178, + "theoretical_loss": 6.631555583663063, + "tokens_seen": 12582912 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.22157348692417145, + "objective/train/docs_used": 19731, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.518082618713379, + "objective/train/original_loss": 7.5180816650390625, + "objective/train/theoretical_loss": 6.583564719922174, + "objective/train/tokens_used": 33567200, + "objective/train/value_avg": -0.24755859375, + "objective/train/value_loss": 0.05980129539966583, + "objective/train/value_max": -0.2432861328125, + "objective/train/value_min": -0.260009765625, + "objective/train/value_reward_corr": -0.12625152283501498, + "objective/train/value_std": 0.005336761474609375, + "objective/train/weight_avg": 1.0224583148956299, + "objective/train/weighted_lm_loss": 7.6825385093688965, + "objective/train/weights_max": 1.0261329412460327, + "objective/train/weights_min": 0.9412704706192017, + "theoretical_loss": 6.583564719922174, + "tokens_seen": 13107200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00040625000000000004, + "loss": 4.0661, + "theoretical_loss": 6.537970167599786, + "tokens_seen": 13631488 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004375, + "loss": 3.9731, + "theoretical_loss": 6.453173753851489, + "tokens_seen": 14680064 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.13495579361915588, + "objective/train/docs_used": 20428, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.81293249130249, + "objective/train/original_loss": 6.81293249130249, + "objective/train/theoretical_loss": 6.448132802811353, + "objective/train/tokens_used": 35205600, + "objective/train/value_avg": -0.16796875, + "objective/train/value_loss": 0.03455476462841034, + "objective/train/value_max": -0.1583251953125, + "objective/train/value_min": -0.1903076171875, + "objective/train/value_reward_corr": -0.10985593661659518, + "objective/train/value_std": 0.0102386474609375, + "objective/train/weight_avg": 1.013668179512024, + "objective/train/weighted_lm_loss": 6.900182247161865, + "objective/train/weights_max": 1.0190743207931519, + "objective/train/weights_min": 0.9266189932823181, + "theoretical_loss": 6.448132802811353, + "tokens_seen": 14745600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00046875, + "loss": 3.8828, + "theoretical_loss": 6.3757961938167265, + "tokens_seen": 15728640 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.11925259977579117, + "objective/train/docs_used": 21641, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.418591022491455, + "objective/train/original_loss": 7.418591022491455, + "objective/train/theoretical_loss": 6.330712056611843, + "objective/train/tokens_used": 36844000, + "objective/train/value_avg": -0.1263427734375, + "objective/train/value_loss": 0.016260379925370216, + "objective/train/value_max": -0.122314453125, + "objective/train/value_min": -0.13818359375, + "objective/train/value_reward_corr": -0.02309434919506304, + "objective/train/value_std": 0.0021381378173828125, + "objective/train/weight_avg": 1.0120066404342651, + "objective/train/weighted_lm_loss": 7.507042407989502, + "objective/train/weights_max": 1.0137604475021362, + "objective/train/weights_min": 0.9165307283401489, + "theoretical_loss": 6.330712056611843, + "tokens_seen": 16384000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005, + "loss": 3.7787, + "theoretical_loss": 6.304755024141699, + "tokens_seen": 16777216 + }, + { + "epoch": 0.01, + "learning_rate": 0.00053125, + "loss": 3.7272, + "theoretical_loss": 6.239181372596676, + "tokens_seen": 17825792 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.06241800636053085, + "objective/train/docs_used": 22253, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.027829647064209, + "objective/train/original_loss": 7.027831077575684, + "objective/train/theoretical_loss": 6.227435356838402, + "objective/train/tokens_used": 38482400, + "objective/train/value_avg": -0.07952880859375, + "objective/train/value_loss": 0.012030147947371006, + "objective/train/value_max": -0.07196044921875, + "objective/train/value_min": -0.0994873046875, + "objective/train/value_reward_corr": -0.020381855624030994, + "objective/train/value_std": 0.006290435791015625, + "objective/train/weight_avg": 1.0063012838363647, + "objective/train/weighted_lm_loss": 7.071319580078125, + "objective/train/weights_max": 1.009782075881958, + "objective/train/weights_min": 0.9132630825042725, + "theoretical_loss": 6.227435356838402, + "tokens_seen": 18022400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005625000000000001, + "loss": 3.6281, + "theoretical_loss": 6.17836807379203, + "tokens_seen": 18874368 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.039723534137010574, + "objective/train/docs_used": 23612, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.162580490112305, + "objective/train/original_loss": 7.162580490112305, + "objective/train/theoretical_loss": 6.135527723436086, + "objective/train/tokens_used": 40120800, + "objective/train/value_avg": -0.06085205078125, + "objective/train/value_loss": 0.0085365055128932, + "objective/train/value_max": -0.055206298828125, + "objective/train/value_min": -0.07171630859375, + "objective/train/value_reward_corr": 0.0014386203725383832, + "objective/train/value_std": 0.003086090087890625, + "objective/train/weight_avg": 1.0040143728256226, + "objective/train/weighted_lm_loss": 7.190032482147217, + "objective/train/weights_max": 1.0070847272872925, + "objective/train/weights_min": 0.9106865525245667, + "theoretical_loss": 6.135527723436086, + "tokens_seen": 19660800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00059375, + "loss": 3.5916, + "theoretical_loss": 6.121732500517281, + "tokens_seen": 19922944 + }, + { + "epoch": 0.01, + "learning_rate": 0.000625, + "loss": 3.5583, + "theoretical_loss": 6.068789398864208, + "tokens_seen": 20971520 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.011823360808193684, + "objective/train/docs_used": 24330, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.622986316680908, + "objective/train/original_loss": 6.62298583984375, + "objective/train/theoretical_loss": 6.052935612120018, + "objective/train/tokens_used": 41759200, + "objective/train/value_avg": -0.037628173828125, + "objective/train/value_loss": 0.006172036752104759, + "objective/train/value_max": -0.03424072265625, + "objective/train/value_min": -0.05072021484375, + "objective/train/value_reward_corr": 0.3817076975150332, + "objective/train/value_std": 0.0045623779296875, + "objective/train/weight_avg": 1.0012130737304688, + "objective/train/weighted_lm_loss": 6.640750408172607, + "objective/train/weights_max": 1.0048424005508423, + "objective/train/weights_min": 0.9655836820602417, + "theoretical_loss": 6.052935612120018, + "tokens_seen": 21299200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00065625, + "loss": 3.5518, + "theoretical_loss": 6.019130679617959, + "tokens_seen": 22020096 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.021808357909321785, + "objective/train/docs_used": 24983, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.079540729522705, + "objective/train/original_loss": 7.079540729522705, + "objective/train/theoretical_loss": 5.978100075365368, + "objective/train/tokens_used": 43397600, + "objective/train/value_avg": -0.032806396484375, + "objective/train/value_loss": 0.006310055032372475, + "objective/train/value_max": -0.028106689453125, + "objective/train/value_min": -0.042877197265625, + "objective/train/value_reward_corr": -0.08562745693681185, + "objective/train/value_std": 0.0027484893798828125, + "objective/train/weight_avg": 1.0022116899490356, + "objective/train/weighted_lm_loss": 7.095316410064697, + "objective/train/weights_max": 1.0041011571884155, + "objective/train/weights_min": 0.9098527431488037, + "theoretical_loss": 5.978100075365368, + "tokens_seen": 22937600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006875, + "loss": 3.5566, + "theoretical_loss": 5.972410144955672, + "tokens_seen": 23068672 + }, + { + "epoch": 0.01, + "learning_rate": 0.00071875, + "loss": 3.5694, + "theoretical_loss": 5.928331780102846, + "tokens_seen": 24117248 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.02006148360669613, + "objective/train/docs_used": 26439, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.832540988922119, + "objective/train/original_loss": 6.832540512084961, + "objective/train/theoretical_loss": 5.909811913378293, + "objective/train/tokens_used": 45036000, + "objective/train/value_avg": -0.03369140625, + "objective/train/value_loss": 0.0028878767043352127, + "objective/train/value_max": -0.0224151611328125, + "objective/train/value_min": -0.043365478515625, + "objective/train/value_reward_corr": 0.0173575966478515, + "objective/train/value_std": 0.00457763671875, + "objective/train/weight_avg": 1.0020204782485962, + "objective/train/weighted_lm_loss": 6.845198154449463, + "objective/train/weights_max": 1.0042790174484253, + "objective/train/weights_min": 0.9273841381072998, + "theoretical_loss": 5.909811913378293, + "tokens_seen": 24576000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00075, + "loss": 3.5475, + "theoretical_loss": 5.886640662049593, + "tokens_seen": 25165824 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.031742192804813385, + "objective/train/docs_used": 27060, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.251229763031006, + "objective/train/original_loss": 7.251229763031006, + "objective/train/theoretical_loss": 5.847115817761683, + "objective/train/tokens_used": 46674400, + "objective/train/value_avg": -0.041107177734375, + "objective/train/value_loss": 0.003610246814787388, + "objective/train/value_max": -0.021820068359375, + "objective/train/value_min": -0.0604248046875, + "objective/train/value_reward_corr": -0.04322390677624416, + "objective/train/value_std": 0.00981903076171875, + "objective/train/weight_avg": 1.0031920671463013, + "objective/train/weighted_lm_loss": 7.275233268737793, + "objective/train/weights_max": 1.0059854984283447, + "objective/train/weights_min": 0.9150698184967041, + "theoretical_loss": 5.847115817761683, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00078125, + "loss": 3.5396, + "theoretical_loss": 5.847115817761683, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008125000000000001, + "loss": 3.5166, + "theoretical_loss": 5.809564554032628, + "tokens_seen": 27262976 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.02137875184416771, + "objective/train/docs_used": 28453, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.915349006652832, + "objective/train/original_loss": 6.91534948348999, + "objective/train/theoretical_loss": 5.789244981090448, + "objective/train/tokens_used": 48312800, + "objective/train/value_avg": -0.0248260498046875, + "objective/train/value_loss": 0.0009062372264452279, + "objective/train/value_max": -0.017913818359375, + "objective/train/value_min": -0.05645751953125, + "objective/train/value_reward_corr": -0.010075610371550763, + "objective/train/value_std": 0.0089263916015625, + "objective/train/weight_avg": 1.0021424293518066, + "objective/train/weighted_lm_loss": 6.9283833503723145, + "objective/train/weights_max": 1.0055909156799316, + "objective/train/weights_min": 0.9606239795684814, + "theoretical_loss": 5.789244981090448, + "tokens_seen": 27852800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00084375, + "loss": 3.507, + "theoretical_loss": 5.773817911814618, + "tokens_seen": 28311552 + }, + { + "epoch": 0.01, + "learning_rate": 0.000875, + "loss": 3.51, + "theoretical_loss": 5.739726989373027, + "tokens_seen": 29360128 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.03898964449763298, + "objective/train/docs_used": 29115, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.931723117828369, + "objective/train/original_loss": 6.931723117828369, + "objective/train/theoretical_loss": 5.735575307377884, + "objective/train/tokens_used": 49951200, + "objective/train/value_avg": -0.045440673828125, + "objective/train/value_loss": 0.0030183629132807255, + "objective/train/value_max": -0.019866943359375, + "objective/train/value_min": -0.15966796875, + "objective/train/value_reward_corr": 0.13322838566373024, + "objective/train/value_std": 0.02838134765625, + "objective/train/weight_avg": 1.0039141178131104, + "objective/train/weighted_lm_loss": 6.954873085021973, + "objective/train/weights_max": 1.0149493217468262, + "objective/train/weights_min": 0.9178234338760376, + "theoretical_loss": 5.735575307377884, + "tokens_seen": 29491200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00090625, + "loss": 3.5118, + "theoretical_loss": 5.707159943639638, + "tokens_seen": 30408704 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.023591022938489914, + "objective/train/docs_used": 30152, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.621243476867676, + "objective/train/original_loss": 6.621243953704834, + "objective/train/theoretical_loss": 5.685592609548186, + "objective/train/tokens_used": 51589600, + "objective/train/value_avg": -0.026123046875, + "objective/train/value_loss": 0.0006534153944812715, + "objective/train/value_max": -0.0165252685546875, + "objective/train/value_min": -0.040618896484375, + "objective/train/value_reward_corr": 0.015822645892606124, + "objective/train/value_std": 0.00655364990234375, + "objective/train/weight_avg": 1.0023622512817383, + "objective/train/weighted_lm_loss": 6.636758804321289, + "objective/train/weights_max": 1.0040148496627808, + "objective/train/weights_min": 0.9891030788421631, + "theoretical_loss": 5.685592609548186, + "tokens_seen": 31129600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009375, + "loss": 3.5433, + "theoretical_loss": 5.6759995259903135, + "tokens_seen": 31457280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00096875, + "loss": 3.4098, + "theoretical_loss": 5.646141042853927, + "tokens_seen": 32505856 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.01788710057735443, + "objective/train/docs_used": 30758, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.741631031036377, + "objective/train/original_loss": 6.741631031036377, + "objective/train/theoretical_loss": 5.638868635567113, + "objective/train/tokens_used": 53228000, + "objective/train/value_avg": -0.027069091796875, + "objective/train/value_loss": 0.00418446259573102, + "objective/train/value_max": -0.010986328125, + "objective/train/value_min": -0.0548095703125, + "objective/train/value_reward_corr": 0.021505445902430306, + "objective/train/value_std": 0.0124969482421875, + "objective/train/weight_avg": 1.0018092393875122, + "objective/train/weighted_lm_loss": 6.753862380981445, + "objective/train/weights_max": 1.0054353475570679, + "objective/train/weights_min": 0.9259672164916992, + "theoretical_loss": 5.638868635567113, + "tokens_seen": 32768000 + }, + { + "epoch": 0.01, + "learning_rate": 0.001, + "loss": 3.4767, + "theoretical_loss": 5.61749065678978, + "tokens_seen": 33554432 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.00287991832010448, + "objective/train/docs_used": 31355, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.167684078216553, + "objective/train/original_loss": 7.167685508728027, + "objective/train/theoretical_loss": 5.595043233700797, + "objective/train/tokens_used": 54866400, + "objective/train/value_avg": -0.0279388427734375, + "objective/train/value_loss": 0.015509962104260921, + "objective/train/value_max": -0.01094818115234375, + "objective/train/value_min": -0.0616455078125, + "objective/train/value_reward_corr": -0.14302327490790223, + "objective/train/value_std": 0.01528167724609375, + "objective/train/weight_avg": 0.9997876286506653, + "objective/train/weighted_lm_loss": 7.168863773345947, + "objective/train/weights_max": 1.0061205625534058, + "objective/train/weights_min": 0.9072225689888, + "theoretical_loss": 5.595043233700797, + "tokens_seen": 34406400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009996789727126805, + "loss": 3.4588, + "theoretical_loss": 5.589963962496837, + "tokens_seen": 34603008 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009993579454253612, + "loss": 3.4756, + "theoretical_loss": 5.563484786407864, + "tokens_seen": 35651584 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.005041239783167839, + "objective/train/docs_used": 32385, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.588967800140381, + "objective/train/original_loss": 6.588967323303223, + "objective/train/theoretical_loss": 5.553810873340668, + "objective/train/tokens_used": 56504800, + "objective/train/value_avg": -0.018341064453125, + "objective/train/value_loss": 0.012799476273357868, + "objective/train/value_max": -0.01107025146484375, + "objective/train/value_min": -0.054290771484375, + "objective/train/value_reward_corr": 0.003019610362779838, + "objective/train/value_std": 0.008056640625, + "objective/train/weight_avg": 0.9995582699775696, + "objective/train/weighted_lm_loss": 6.584326267242432, + "objective/train/weights_max": 1.0052070617675781, + "objective/train/weights_min": 0.9082019925117493, + "theoretical_loss": 5.553810873340668, + "tokens_seen": 36044800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009990369181380417, + "loss": 3.4678, + "theoretical_loss": 5.537984169317745, + "tokens_seen": 36700160 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.02525314874947071, + "objective/train/docs_used": 32883, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 7.260111331939697, + "objective/train/original_loss": 7.2601118087768555, + "objective/train/theoretical_loss": 5.514910312017166, + "objective/train/tokens_used": 58143200, + "objective/train/value_avg": -0.04998779296875, + "objective/train/value_loss": 0.005380138289183378, + "objective/train/value_max": -0.0121917724609375, + "objective/train/value_min": -0.12493896484375, + "objective/train/value_reward_corr": -0.1868435562519545, + "objective/train/value_std": 0.034210205078125, + "objective/train/weight_avg": 1.0025521516799927, + "objective/train/weighted_lm_loss": 7.280068397521973, + "objective/train/weights_max": 1.0124826431274414, + "objective/train/weights_min": 0.9521530270576477, + "theoretical_loss": 5.514910312017166, + "tokens_seen": 37683200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009987158908507222, + "loss": 3.5616, + "theoretical_loss": 5.513399499781949, + "tokens_seen": 37748736 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998394863563403, + "loss": 3.4846, + "theoretical_loss": 5.4896737724343065, + "tokens_seen": 38797312 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.004549255594611168, + "objective/train/docs_used": 34166, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.949094295501709, + "objective/train/original_loss": 6.949094295501709, + "objective/train/theoretical_loss": 5.478116572052198, + "objective/train/tokens_used": 59781600, + "objective/train/value_avg": -0.0161285400390625, + "objective/train/value_loss": 0.009610316716134548, + "objective/train/value_max": -0.007755279541015625, + "objective/train/value_min": -0.04052734375, + "objective/train/value_reward_corr": -0.03573522790849465, + "objective/train/value_std": 0.00679779052734375, + "objective/train/weight_avg": 0.9995920062065125, + "objective/train/weighted_lm_loss": 6.945201873779297, + "objective/train/weights_max": 1.0038763284683228, + "objective/train/weights_min": 0.9060330390930176, + "theoretical_loss": 5.478116572052198, + "tokens_seen": 39321600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009980738362760835, + "loss": 3.4745, + "theoretical_loss": 5.466754950375066, + "tokens_seen": 39845888 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009977528089887642, + "loss": 3.4357, + "theoretical_loss": 5.444595414709691, + "tokens_seen": 40894464 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.012062415480613708, + "objective/train/docs_used": 34866, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.9644856452941895, + "objective/train/original_loss": 6.964486122131348, + "objective/train/theoretical_loss": 5.443234637702833, + "objective/train/tokens_used": 61420000, + "objective/train/value_avg": -0.0208740234375, + "objective/train/value_loss": 0.0013184717390686274, + "objective/train/value_max": -0.0118255615234375, + "objective/train/value_min": -0.0310211181640625, + "objective/train/value_reward_corr": 0.009720149519840134, + "objective/train/value_std": 0.003765106201171875, + "objective/train/weight_avg": 1.0012128353118896, + "objective/train/weighted_lm_loss": 6.972752571105957, + "objective/train/weights_max": 1.0030404329299927, + "objective/train/weights_min": 0.954939603805542, + "theoretical_loss": 5.443234637702833, + "tokens_seen": 40960000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009974317817014447, + "loss": 3.3889, + "theoretical_loss": 5.423151487427312, + "tokens_seen": 41943040 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.011038665659725666, + "objective/train/docs_used": 35868, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.3744611740112305, + "objective/train/original_loss": 6.3744611740112305, + "objective/train/theoretical_loss": 5.410094451075121, + "objective/train/tokens_used": 63058400, + "objective/train/value_avg": -0.023406982421875, + "objective/train/value_loss": 0.01132715679705143, + "objective/train/value_max": -0.01061248779296875, + "objective/train/value_min": -0.04168701171875, + "objective/train/value_reward_corr": 0.004592911286733317, + "objective/train/value_std": 0.0074005126953125, + "objective/train/weight_avg": 0.9989519119262695, + "objective/train/weighted_lm_loss": 6.376488208770752, + "objective/train/weights_max": 1.0040754079818726, + "objective/train/weights_min": 0.912762463092804, + "theoretical_loss": 5.410094451075121, + "tokens_seen": 42598400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009971107544141252, + "loss": 3.4122, + "theoretical_loss": 5.402383016282556, + "tokens_seen": 42991616 + }, + { + "epoch": 0.01, + "learning_rate": 0.000996789727126806, + "loss": 3.3889, + "theoretical_loss": 5.382253012327088, + "tokens_seen": 44040192 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.021652761846780777, + "objective/train/docs_used": 36544, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.387622833251953, + "objective/train/original_loss": 6.387624263763428, + "objective/train/theoretical_loss": 5.3785469004263184, + "objective/train/tokens_used": 64696800, + "objective/train/value_avg": -0.0181884765625, + "objective/train/value_loss": 0.006576833315193653, + "objective/train/value_max": -0.007785797119140625, + "objective/train/value_min": -0.12347412109375, + "objective/train/value_reward_corr": -0.21017913918060144, + "objective/train/value_std": 0.01186370849609375, + "objective/train/weight_avg": 0.9978673458099365, + "objective/train/weighted_lm_loss": 6.3721137046813965, + "objective/train/weights_max": 1.0123697519302368, + "objective/train/weights_min": 0.9132270216941833, + "theoretical_loss": 5.3785469004263184, + "tokens_seen": 44236800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009964686998394864, + "loss": 3.361, + "theoretical_loss": 5.362727332334602, + "tokens_seen": 45088768 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.018998241052031517, + "objective/train/docs_used": 37915, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.239251136779785, + "objective/train/original_loss": 6.239251136779785, + "objective/train/theoretical_loss": 5.348460575231594, + "objective/train/tokens_used": 66335200, + "objective/train/value_avg": -0.0169830322265625, + "objective/train/value_loss": 0.02853401005268097, + "objective/train/value_max": -0.007785797119140625, + "objective/train/value_min": -0.08758544921875, + "objective/train/value_reward_corr": -0.00881444303727045, + "objective/train/value_std": 0.00702667236328125, + "objective/train/weight_avg": 0.9982386231422424, + "objective/train/weighted_lm_loss": 6.224942684173584, + "objective/train/weights_max": 1.0087311267852783, + "objective/train/weights_min": 0.9059091210365295, + "theoretical_loss": 5.348460575231594, + "tokens_seen": 45875200 + }, + { + "epoch": 0.01, + "learning_rate": 0.000996147672552167, + "loss": 3.3833, + "theoretical_loss": 5.343774399657292, + "tokens_seen": 46137344 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009958266452648476, + "loss": 3.3761, + "theoretical_loss": 5.325364958105951, + "tokens_seen": 47185920 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.0032461965456604958, + "objective/train/docs_used": 38651, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.5479736328125, + "objective/train/original_loss": 6.547974586486816, + "objective/train/theoretical_loss": 5.319719119782715, + "objective/train/tokens_used": 67973600, + "objective/train/value_avg": -0.02008056640625, + "objective/train/value_loss": 0.005591921042650938, + "objective/train/value_max": -0.01116180419921875, + "objective/train/value_min": -0.52734375, + "objective/train/value_reward_corr": 0.011861338254863883, + "objective/train/value_std": 0.01087188720703125, + "objective/train/weight_avg": 1.000351905822754, + "objective/train/weighted_lm_loss": 6.549544811248779, + "objective/train/weights_max": 1.0539733171463013, + "objective/train/weights_min": 0.9124422669410706, + "theoretical_loss": 5.319719119782715, + "tokens_seen": 47513600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009955056179775281, + "loss": 3.3715, + "theoretical_loss": 5.307471854308661, + "tokens_seen": 48234496 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": -0.04573943465948105, + "objective/train/docs_used": 39958, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.967624664306641, + "objective/train/original_loss": 6.967625141143799, + "objective/train/theoretical_loss": 5.292219058433327, + "objective/train/tokens_used": 69612000, + "objective/train/value_avg": -0.02325439453125, + "objective/train/value_loss": 0.05067373067140579, + "objective/train/value_max": -0.01090240478515625, + "objective/train/value_min": -0.465576171875, + "objective/train/value_reward_corr": 0.03243802324435401, + "objective/train/value_std": 0.01270294189453125, + "objective/train/weight_avg": 0.9956722259521484, + "objective/train/weighted_lm_loss": 6.9361958503723145, + "objective/train/weights_max": 1.04741370677948, + "objective/train/weights_min": 0.9062807559967041, + "theoretical_loss": 5.292219058433327, + "tokens_seen": 49152000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009951845906902086, + "loss": 3.3204, + "theoretical_loss": 5.290069844712654, + "tokens_seen": 49283072 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009948635634028894, + "loss": 3.3215, + "theoretical_loss": 5.273135423980159, + "tokens_seen": 50331648 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": -0.04157426208257675, + "objective/train/docs_used": 40584, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.215948581695557, + "objective/train/original_loss": 6.215948104858398, + "objective/train/theoretical_loss": 5.265867995771362, + "objective/train/tokens_used": 71250400, + "objective/train/value_avg": -0.0168609619140625, + "objective/train/value_loss": 0.03253501281142235, + "objective/train/value_max": -0.00821685791015625, + "objective/train/value_min": -0.05059814453125, + "objective/train/value_reward_corr": -0.13533021460202396, + "objective/train/value_std": 0.007175445556640625, + "objective/train/weight_avg": 0.9960008859634399, + "objective/train/weighted_lm_loss": 6.197579860687256, + "objective/train/weights_max": 1.0050069093704224, + "objective/train/weights_min": 0.9068111181259155, + "theoretical_loss": 5.265867995771362, + "tokens_seen": 50790400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009945425361155699, + "loss": 3.2891, + "theoretical_loss": 5.256646672015468, + "tokens_seen": 51380224 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.0031608380377292633, + "objective/train/docs_used": 41941, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.38153600692749, + "objective/train/original_loss": 6.381536483764648, + "objective/train/theoretical_loss": 5.240583117265738, + "objective/train/tokens_used": 72888800, + "objective/train/value_avg": -0.0150146484375, + "objective/train/value_loss": 0.00558395404368639, + "objective/train/value_max": -0.006931304931640625, + "objective/train/value_min": -0.03875732421875, + "objective/train/value_reward_corr": -0.060091902970768475, + "objective/train/value_std": 0.007183074951171875, + "objective/train/weight_avg": 1.0003433227539062, + "objective/train/weighted_lm_loss": 6.385217666625977, + "objective/train/weights_max": 1.003819227218628, + "objective/train/weights_min": 0.9118895530700684, + "theoretical_loss": 5.240583117265738, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009942215088282504, + "loss": 3.3014, + "theoretical_loss": 5.240583117265738, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.000993900481540931, + "loss": 3.2814, + "theoretical_loss": 5.224925614276991, + "tokens_seen": 53477376 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.013915037736296654, + "objective/train/docs_used": 42708, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.6965765953063965, + "objective/train/original_loss": 6.696576118469238, + "objective/train/theoretical_loss": 5.2162899325491265, + "objective/train/tokens_used": 74527200, + "objective/train/value_avg": -0.0181732177734375, + "objective/train/value_loss": 0.001287174178287387, + "objective/train/value_max": -0.00800323486328125, + "objective/train/value_min": -0.06488037109375, + "objective/train/value_reward_corr": -0.005727689767209602, + "objective/train/value_std": 0.0104217529296875, + "objective/train/weight_avg": 1.001397728919983, + "objective/train/weighted_lm_loss": 6.706601619720459, + "objective/train/weights_max": 1.00644052028656, + "objective/train/weights_min": 0.9058850407600403, + "theoretical_loss": 5.2162899325491265, + "tokens_seen": 54067200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009935794542536116, + "loss": 3.3082, + "theoretical_loss": 5.209656233771442, + "tokens_seen": 54525952 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009932584269662923, + "loss": 3.2638, + "theoretical_loss": 5.194758163752068, + "tokens_seen": 55574528 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": -0.00944122951477766, + "objective/train/docs_used": 44116, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.814762115478516, + "objective/train/original_loss": 6.814761638641357, + "objective/train/theoretical_loss": 5.192921216021549, + "objective/train/tokens_used": 76165600, + "objective/train/value_avg": -0.022735595703125, + "objective/train/value_loss": 0.020141715183854103, + "objective/train/value_max": -0.00864410400390625, + "objective/train/value_min": -0.06817626953125, + "objective/train/value_reward_corr": 0.026943394173354698, + "objective/train/value_std": 0.0135345458984375, + "objective/train/weight_avg": 0.9991538524627686, + "objective/train/weighted_lm_loss": 6.80916690826416, + "objective/train/weights_max": 1.0067789554595947, + "objective/train/weights_min": 0.905930757522583, + "theoretical_loss": 5.192921216021549, + "tokens_seen": 55705600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009929373996789728, + "loss": 3.2635, + "theoretical_loss": 5.180215620343211, + "tokens_seen": 56623104 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": -0.002651343122124672, + "objective/train/docs_used": 44698, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.204342842102051, + "objective/train/original_loss": 6.204342842102051, + "objective/train/theoretical_loss": 5.1704161089874425, + "objective/train/tokens_used": 77804000, + "objective/train/value_avg": -0.0198211669921875, + "objective/train/value_loss": 0.007966230623424053, + "objective/train/value_max": -0.0074310302734375, + "objective/train/value_min": -0.060089111328125, + "objective/train/value_reward_corr": -0.10258974701201411, + "objective/train/value_std": 0.01080322265625, + "objective/train/weight_avg": 0.9997740983963013, + "objective/train/weighted_lm_loss": 6.200537204742432, + "objective/train/weights_max": 1.005968451499939, + "objective/train/weights_min": 0.913537323474884, + "theoretical_loss": 5.1704161089874425, + "tokens_seen": 57344000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009926163723916533, + "loss": 3.2638, + "theoretical_loss": 5.166013767248007, + "tokens_seen": 57671680 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009922953451043338, + "loss": 3.2241, + "theoretical_loss": 5.152138642849951, + "tokens_seen": 58720256 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.009609945118427277, + "objective/train/docs_used": 46033, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.134335517883301, + "objective/train/original_loss": 6.134335517883301, + "objective/train/theoretical_loss": 5.148719354852201, + "objective/train/tokens_used": 79442400, + "objective/train/value_avg": -0.0167083740234375, + "objective/train/value_loss": 0.0011384186800569296, + "objective/train/value_max": -0.00634002685546875, + "objective/train/value_min": -0.046539306640625, + "objective/train/value_reward_corr": -0.055066129136898075, + "objective/train/value_std": 0.0073394775390625, + "objective/train/weight_avg": 1.0009666681289673, + "objective/train/weighted_lm_loss": 6.139864921569824, + "objective/train/weights_max": 1.0045959949493408, + "objective/train/weights_min": 0.9241358637809753, + "theoretical_loss": 5.148719354852201, + "tokens_seen": 58982400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009919743178170143, + "loss": 3.1989, + "theoretical_loss": 5.138577094110906, + "tokens_seen": 59768832 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.004386140964925289, + "objective/train/docs_used": 46630, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.90932559967041, + "objective/train/original_loss": 6.90932559967041, + "objective/train/theoretical_loss": 5.127780644563984, + "objective/train/tokens_used": 81080800, + "objective/train/value_avg": -0.019073486328125, + "objective/train/value_loss": 0.002533550374209881, + "objective/train/value_max": -0.0074310302734375, + "objective/train/value_min": -0.04620361328125, + "objective/train/value_reward_corr": -0.05623726184561407, + "objective/train/value_std": 0.0089263916015625, + "objective/train/weight_avg": 1.0004510879516602, + "objective/train/weighted_lm_loss": 6.913048267364502, + "objective/train/weights_max": 1.004174828529358, + "objective/train/weights_min": 0.9255488514900208, + "theoretical_loss": 5.127780644563984, + "tokens_seen": 60620800 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991653290529695, + "loss": 3.2541, + "theoretical_loss": 5.12531671652499, + "tokens_seen": 60817408 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009913322632423755, + "loss": 3.1728, + "theoretical_loss": 5.112345799479678, + "tokens_seen": 61865984 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.0015623958315700293, + "objective/train/docs_used": 47896, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.781630039215088, + "objective/train/original_loss": 6.78162956237793, + "objective/train/theoretical_loss": 5.107554053900861, + "objective/train/tokens_used": 82719200, + "objective/train/value_avg": -0.0204620361328125, + "objective/train/value_loss": 0.007244364824146032, + "objective/train/value_max": -0.00861358642578125, + "objective/train/value_min": -0.1217041015625, + "objective/train/value_reward_corr": -0.0019938466249876984, + "objective/train/value_std": 0.00986480712890625, + "objective/train/weight_avg": 1.0001916885375977, + "objective/train/weighted_lm_loss": 6.784290313720703, + "objective/train/weights_max": 1.01212739944458, + "objective/train/weights_min": 0.9068973660469055, + "theoretical_loss": 5.107554053900861, + "tokens_seen": 62259200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009910112359550563, + "loss": 3.2195, + "theoretical_loss": 5.0996532764547, + "tokens_seen": 62914560 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.003353612031787634, + "objective/train/docs_used": 48551, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.390549659729004, + "objective/train/original_loss": 6.390549659729004, + "objective/train/theoretical_loss": 5.087997557671262, + "objective/train/tokens_used": 84357600, + "objective/train/value_avg": -0.0200653076171875, + "objective/train/value_loss": 0.011525045149028301, + "objective/train/value_max": -0.0074310302734375, + "objective/train/value_min": -0.057281494140625, + "objective/train/value_reward_corr": -0.03604430846394821, + "objective/train/value_std": 0.0093536376953125, + "objective/train/weight_avg": 1.0003914833068848, + "objective/train/weighted_lm_loss": 6.392732620239258, + "objective/train/weights_max": 1.0054819583892822, + "objective/train/weights_min": 0.907982587814331, + "theoretical_loss": 5.087997557671262, + "tokens_seen": 63897600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009906902086677368, + "loss": 3.1861, + "theoretical_loss": 5.087228679557634, + "tokens_seen": 63963136 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009903691813804173, + "loss": 3.2066, + "theoretical_loss": 5.075062097954335, + "tokens_seen": 65011712 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.007881049066781998, + "objective/train/docs_used": 49128, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.5159382820129395, + "objective/train/original_loss": 6.515939235687256, + "objective/train/theoretical_loss": 5.069072608639006, + "objective/train/tokens_used": 85996000, + "objective/train/value_avg": -0.0191802978515625, + "objective/train/value_loss": 0.004255206324160099, + "objective/train/value_max": -0.00634002685546875, + "objective/train/value_min": -0.11083984375, + "objective/train/value_reward_corr": 0.007194444271940113, + "objective/train/value_std": 0.0086517333984375, + "objective/train/weight_avg": 1.0008089542388916, + "objective/train/weighted_lm_loss": 6.521170616149902, + "objective/train/weights_max": 1.0110142230987549, + "objective/train/weights_min": 0.9068260788917542, + "theoretical_loss": 5.069072608639006, + "tokens_seen": 65536000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990048154093098, + "loss": 3.1706, + "theoretical_loss": 5.063144139803664, + "tokens_seen": 66060288 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009897271268057785, + "loss": 3.1891, + "theoretical_loss": 5.051465897350656, + "tokens_seen": 67108864 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.006186979822814465, + "objective/train/docs_used": 50658, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.942707538604736, + "objective/train/original_loss": 5.942707538604736, + "objective/train/theoretical_loss": 5.0507437711683, + "objective/train/tokens_used": 87634400, + "objective/train/value_avg": -0.0178985595703125, + "objective/train/value_loss": 0.005681105423718691, + "objective/train/value_max": -0.0074615478515625, + "objective/train/value_min": -0.053009033203125, + "objective/train/value_reward_corr": 0.028218477385015803, + "objective/train/value_std": 0.0084686279296875, + "objective/train/weight_avg": 1.0006463527679443, + "objective/train/weighted_lm_loss": 5.946798801422119, + "objective/train/weights_max": 1.0052566528320312, + "objective/train/weights_min": 0.9061599373817444, + "theoretical_loss": 5.0507437711683, + "tokens_seen": 67174400 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989406099518459, + "loss": 3.173, + "theoretical_loss": 5.040018914871285, + "tokens_seen": 68157440 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.004808951169252396, + "objective/train/docs_used": 51428, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.299284934997559, + "objective/train/original_loss": 6.299285411834717, + "objective/train/theoretical_loss": 5.032978401333766, + "objective/train/tokens_used": 89272800, + "objective/train/value_avg": -0.0190277099609375, + "objective/train/value_loss": 0.004322511609643698, + "objective/train/value_max": -0.0060272216796875, + "objective/train/value_min": -0.0970458984375, + "objective/train/value_reward_corr": 0.02529551962025344, + "objective/train/value_std": 0.0084686279296875, + "objective/train/weight_avg": 1.000502109527588, + "objective/train/weighted_lm_loss": 6.302114963531494, + "objective/train/weights_max": 1.0096710920333862, + "objective/train/weights_min": 0.9127773642539978, + "theoretical_loss": 5.032978401333766, + "tokens_seen": 68812800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009890850722311397, + "loss": 3.1813, + "theoretical_loss": 5.028795159195919, + "tokens_seen": 69206016 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009887640449438202, + "loss": 3.1412, + "theoretical_loss": 5.01778699256848, + "tokens_seen": 70254592 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.006006576120853424, + "objective/train/docs_used": 52769, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.121641159057617, + "objective/train/original_loss": 6.121641635894775, + "objective/train/theoretical_loss": 5.015746366650382, + "objective/train/tokens_used": 90911200, + "objective/train/value_avg": -0.0188140869140625, + "objective/train/value_loss": 0.006015362683683634, + "objective/train/value_max": -0.0062408447265625, + "objective/train/value_min": -0.048492431640625, + "objective/train/value_reward_corr": 0.01982207008723441, + "objective/train/value_std": 0.00812530517578125, + "objective/train/weight_avg": 1.0006300210952759, + "objective/train/weighted_lm_loss": 6.12559700012207, + "objective/train/weights_max": 1.0046688318252563, + "objective/train/weights_min": 0.9066880941390991, + "theoretical_loss": 5.015746366650382, + "tokens_seen": 70451200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009884430176565007, + "loss": 3.1191, + "theoretical_loss": 5.006987147624395, + "tokens_seen": 71303168 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": -0.013219057582318783, + "objective/train/docs_used": 53229, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.057215213775635, + "objective/train/original_loss": 6.057216167449951, + "objective/train/theoretical_loss": 4.999019799720424, + "objective/train/tokens_used": 92549600, + "objective/train/value_avg": -0.0201416015625, + "objective/train/value_loss": 0.017483025789260864, + "objective/train/value_max": -0.00701141357421875, + "objective/train/value_min": -0.11102294921875, + "objective/train/value_reward_corr": -0.015870052531823987, + "objective/train/value_std": 0.01010894775390625, + "objective/train/weight_avg": 0.9987632036209106, + "objective/train/weighted_lm_loss": 6.046870708465576, + "objective/train/weights_max": 1.0089390277862549, + "objective/train/weights_min": 0.9061722159385681, + "theoretical_loss": 4.999019799720424, + "tokens_seen": 72089600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009881219903691814, + "loss": 3.1747, + "theoretical_loss": 4.996388704293487, + "tokens_seen": 72351744 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987800963081862, + "loss": 3.1738, + "theoretical_loss": 4.985985068454193, + "tokens_seen": 73400320 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.012800940312445164, + "objective/train/docs_used": 53865, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.9091997146606445, + "objective/train/original_loss": 6.9091997146606445, + "objective/train/theoretical_loss": 4.982772881024813, + "objective/train/tokens_used": 94188000, + "objective/train/value_avg": -0.0201568603515625, + "objective/train/value_loss": 0.001971928169950843, + "objective/train/value_max": -0.0063629150390625, + "objective/train/value_min": -0.07464599609375, + "objective/train/value_reward_corr": -0.00448257528471459, + "objective/train/value_std": 0.0092926025390625, + "objective/train/weight_avg": 1.001289963722229, + "objective/train/weighted_lm_loss": 6.918591022491455, + "objective/train/weights_max": 1.0074265003204346, + "objective/train/weights_min": 0.9441697597503662, + "theoretical_loss": 4.982772881024813, + "tokens_seen": 73728000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009874799357945426, + "loss": 3.1497, + "theoretical_loss": 4.9757699521834, + "tokens_seen": 74448896 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.012019794434309006, + "objective/train/docs_used": 54881, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.742319107055664, + "objective/train/original_loss": 6.742319107055664, + "objective/train/theoretical_loss": 4.966981646847723, + "objective/train/tokens_used": 95826400, + "objective/train/value_avg": -0.0206756591796875, + "objective/train/value_loss": 0.0014446404529735446, + "objective/train/value_max": -0.006717681884765625, + "objective/train/value_min": -0.06109619140625, + "objective/train/value_reward_corr": 0.02044586238741301, + "objective/train/value_std": 0.00949859619140625, + "objective/train/weight_avg": 1.0012091398239136, + "objective/train/weighted_lm_loss": 6.750966548919678, + "objective/train/weights_max": 1.006060004234314, + "objective/train/weights_min": 0.9146670699119568, + "theoretical_loss": 4.966981646847723, + "tokens_seen": 75366400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009871589085072231, + "loss": 3.1452, + "theoretical_loss": 4.96573735546202, + "tokens_seen": 75497472 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009868378812199036, + "loss": 3.1248, + "theoretical_loss": 4.955881549210428, + "tokens_seen": 76546048 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.0012432209914550185, + "objective/train/docs_used": 55424, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.347426891326904, + "objective/train/original_loss": 6.347426891326904, + "objective/train/theoretical_loss": 4.951623818949572, + "objective/train/tokens_used": 97464800, + "objective/train/value_avg": -0.0189666748046875, + "objective/train/value_loss": 0.007152511738240719, + "objective/train/value_max": -0.00621795654296875, + "objective/train/value_min": -0.07421875, + "objective/train/value_reward_corr": -0.047807793808145295, + "objective/train/value_std": 0.0100555419921875, + "objective/train/weight_avg": 1.0001593828201294, + "objective/train/weighted_lm_loss": 6.3489670753479, + "objective/train/weights_max": 1.0068278312683105, + "objective/train/weights_min": 0.9210269451141357, + "theoretical_loss": 4.951623818949572, + "tokens_seen": 77004800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009865168539325844, + "loss": 3.1423, + "theoretical_loss": 4.946197059540362, + "tokens_seen": 77594624 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": -0.007122819311916828, + "objective/train/docs_used": 56825, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.567651748657227, + "objective/train/original_loss": 6.567652702331543, + "objective/train/theoretical_loss": 4.936678653120895, + "objective/train/tokens_used": 99103200, + "objective/train/value_avg": -0.01953125, + "objective/train/value_loss": 0.017145812511444092, + "objective/train/value_max": -0.005321502685546875, + "objective/train/value_min": -0.090576171875, + "objective/train/value_reward_corr": -0.07937142989408875, + "objective/train/value_std": 0.01285552978515625, + "objective/train/weight_avg": 0.9993711709976196, + "objective/train/weighted_lm_loss": 6.564722061157227, + "objective/train/weights_max": 1.008908987045288, + "objective/train/weights_min": 0.9076371788978577, + "theoretical_loss": 4.936678653120895, + "tokens_seen": 78643200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009861958266452649, + "loss": 3.0869, + "theoretical_loss": 4.936678653120895, + "tokens_seen": 78643200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009858747993579454, + "loss": 3.1132, + "theoretical_loss": 4.927321323566017, + "tokens_seen": 79691776 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.010361924767494202, + "objective/train/docs_used": 57296, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.390318393707275, + "objective/train/original_loss": 6.390318870544434, + "objective/train/theoretical_loss": 4.9221268041788555, + "objective/train/tokens_used": 100741600, + "objective/train/value_avg": -0.021087646484375, + "objective/train/value_loss": 0.002562181791290641, + "objective/train/value_max": -0.00536346435546875, + "objective/train/value_min": -0.12445068359375, + "objective/train/value_reward_corr": -0.007551297561926321, + "objective/train/value_std": 0.01218414306640625, + "objective/train/weight_avg": 1.0010489225387573, + "objective/train/weighted_lm_loss": 6.397782802581787, + "objective/train/weights_max": 1.0124413967132568, + "objective/train/weights_min": 0.9482517242431641, + "theoretical_loss": 4.9221268041788555, + "tokens_seen": 80281600 + }, + { + "epoch": 0.02, + "learning_rate": 0.000985553772070626, + "loss": 3.0975, + "theoretical_loss": 4.918120278760069, + "tokens_seen": 80740352 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009852327447833066, + "loss": 3.1094, + "theoretical_loss": 4.909070929045194, + "tokens_seen": 81788928 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": -0.005871062632650137, + "objective/train/docs_used": 58605, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.5089311599731445, + "objective/train/original_loss": 6.5089311599731445, + "objective/train/theoretical_loss": 4.907950205325841, + "objective/train/tokens_used": 102380000, + "objective/train/value_avg": -0.0189361572265625, + "objective/train/value_loss": 0.008267113007605076, + "objective/train/value_max": -0.004039764404296875, + "objective/train/value_min": -0.133544921875, + "objective/train/value_reward_corr": -0.05902696949878982, + "objective/train/value_std": 0.011138916015625, + "objective/train/weight_avg": 0.9994536638259888, + "objective/train/weighted_lm_loss": 6.503223896026611, + "objective/train/weights_max": 1.0132840871810913, + "objective/train/weights_min": 0.9374246001243591, + "theoretical_loss": 4.907950205325841, + "tokens_seen": 81920000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000984911717495987, + "loss": 3.1288, + "theoretical_loss": 4.90016887620194, + "tokens_seen": 82837504 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.015008117072284222, + "objective/train/docs_used": 59310, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.6573896408081055, + "objective/train/original_loss": 6.657390594482422, + "objective/train/theoretical_loss": 4.894131960088744, + "objective/train/tokens_used": 104018400, + "objective/train/value_avg": -0.0240631103515625, + "objective/train/value_loss": 0.0021330935414880514, + "objective/train/value_max": -0.005512237548828125, + "objective/train/value_min": -0.190673828125, + "objective/train/value_reward_corr": -0.05691925749125899, + "objective/train/value_std": 0.0171966552734375, + "objective/train/weight_avg": 1.0015113353729248, + "objective/train/weighted_lm_loss": 6.66836404800415, + "objective/train/weights_max": 1.0183026790618896, + "objective/train/weights_min": 0.9511547684669495, + "theoretical_loss": 4.894131960088744, + "tokens_seen": 83558400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009845906902086678, + "loss": 3.1556, + "theoretical_loss": 4.891409903160486, + "tokens_seen": 83886080 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009842696629213483, + "loss": 3.2071, + "theoretical_loss": 4.882789964385566, + "tokens_seen": 84934656 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.013328813016414642, + "objective/train/docs_used": 60492, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.525941848754883, + "objective/train/original_loss": 6.525941848754883, + "objective/train/theoretical_loss": 4.880656245308686, + "objective/train/tokens_used": 105656800, + "objective/train/value_avg": -0.015411376953125, + "objective/train/value_loss": 0.011904042214155197, + "objective/train/value_max": -0.004329681396484375, + "objective/train/value_min": -0.055511474609375, + "objective/train/value_reward_corr": 0.027159267789206833, + "objective/train/value_std": 0.008148193359375, + "objective/train/weight_avg": 0.9987253546714783, + "objective/train/weighted_lm_loss": 6.518299579620361, + "objective/train/weights_max": 1.0054843425750732, + "objective/train/weights_min": 0.9083477854728699, + "theoretical_loss": 4.880656245308686, + "tokens_seen": 85196800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009839486356340288, + "loss": 3.1069, + "theoretical_loss": 4.874305176883285, + "tokens_seen": 85983232 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.018874622881412506, + "objective/train/docs_used": 61005, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.927553653717041, + "objective/train/original_loss": 5.927552700042725, + "objective/train/theoretical_loss": 4.867508223862666, + "objective/train/tokens_used": 107295200, + "objective/train/value_avg": -0.0292205810546875, + "objective/train/value_loss": 0.003877033479511738, + "objective/train/value_max": -0.005428314208984375, + "objective/train/value_min": -0.14599609375, + "objective/train/value_reward_corr": -0.0012490328079153463, + "objective/train/value_std": 0.021453857421875, + "objective/train/weight_avg": 1.0019066333770752, + "objective/train/weighted_lm_loss": 5.938565254211426, + "objective/train/weights_max": 1.0145999193191528, + "objective/train/weights_min": 0.9064410924911499, + "theoretical_loss": 4.867508223862666, + "tokens_seen": 86835200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009836276083467095, + "loss": 3.1224, + "theoretical_loss": 4.865951811782555, + "tokens_seen": 87031808 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098330658105939, + "loss": 3.1293, + "theoretical_loss": 4.857726286448001, + "tokens_seen": 88080384 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.009257897734642029, + "objective/train/docs_used": 61597, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.57275915145874, + "objective/train/original_loss": 6.572758197784424, + "objective/train/theoretical_loss": 4.854673965977539, + "objective/train/tokens_used": 108933600, + "objective/train/value_avg": -0.01739501953125, + "objective/train/value_loss": 0.00167740264441818, + "objective/train/value_max": -0.004299163818359375, + "objective/train/value_min": -0.07781982421875, + "objective/train/value_reward_corr": 0.021943718006954174, + "objective/train/value_std": 0.00894927978515625, + "objective/train/weight_avg": 1.0009340047836304, + "objective/train/weighted_lm_loss": 6.579258918762207, + "objective/train/weights_max": 1.0075697898864746, + "objective/train/weights_min": 0.9314679503440857, + "theoretical_loss": 4.854673965977539, + "tokens_seen": 88473600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009829855537720708, + "loss": 3.0824, + "theoretical_loss": 4.849625157084915, + "tokens_seen": 89128960 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.015534603968262672, + "objective/train/docs_used": 63098, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.224563121795654, + "objective/train/original_loss": 6.224562168121338, + "objective/train/theoretical_loss": 4.8421403781486365, + "objective/train/tokens_used": 110572000, + "objective/train/value_avg": -0.0253448486328125, + "objective/train/value_loss": 0.0048468029126524925, + "objective/train/value_max": -0.0043487548828125, + "objective/train/value_min": -0.319580078125, + "objective/train/value_reward_corr": -0.03739744093010177, + "objective/train/value_std": 0.04534912109375, + "objective/train/weight_avg": 1.001577615737915, + "objective/train/weighted_lm_loss": 6.231696605682373, + "objective/train/weights_max": 1.0323060750961304, + "objective/train/weights_min": 0.914068341255188, + "theoretical_loss": 4.8421403781486365, + "tokens_seen": 90112000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009826645264847513, + "loss": 3.0773, + "theoretical_loss": 4.8416451118001484, + "tokens_seen": 90177536 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009823434991974318, + "loss": 3.0738, + "theoretical_loss": 4.833782964085925, + "tokens_seen": 91226112 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.012817732989788055, + "objective/train/docs_used": 63840, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.004798889160156, + "objective/train/original_loss": 6.0047993659973145, + "objective/train/theoretical_loss": 4.829895138804573, + "objective/train/tokens_used": 112210400, + "objective/train/value_avg": -0.0216217041015625, + "objective/train/value_loss": 0.003999830689281225, + "objective/train/value_max": -0.00582122802734375, + "objective/train/value_min": -0.10797119140625, + "objective/train/value_reward_corr": 0.020043408442549766, + "objective/train/value_std": 0.01104736328125, + "objective/train/weight_avg": 1.0013014078140259, + "objective/train/weighted_lm_loss": 6.013364315032959, + "objective/train/weights_max": 1.0107766389846802, + "objective/train/weights_min": 0.9092950820922852, + "theoretical_loss": 4.829895138804573, + "tokens_seen": 91750400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009820224719101125, + "loss": 3.0564, + "theoretical_loss": 4.826035646696238, + "tokens_seen": 92274688 + }, + { + "epoch": 0.03, + "learning_rate": 0.000981701444622793, + "loss": 3.0699, + "theoretical_loss": 4.8184002058880395, + "tokens_seen": 93323264 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.00809411983937025, + "objective/train/docs_used": 65224, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.181565284729004, + "objective/train/original_loss": 6.181565284729004, + "objective/train/theoretical_loss": 4.817926639970137, + "objective/train/tokens_used": 113848800, + "objective/train/value_avg": -0.019073486328125, + "objective/train/value_loss": 0.003721760818734765, + "objective/train/value_max": -0.0050201416015625, + "objective/train/value_min": -0.137939453125, + "objective/train/value_reward_corr": -0.028494184997504935, + "objective/train/value_std": 0.0124359130859375, + "objective/train/weight_avg": 1.000827670097351, + "objective/train/weighted_lm_loss": 6.187198162078857, + "objective/train/weights_max": 1.0137414932250977, + "objective/train/weights_min": 0.9092653393745422, + "theoretical_loss": 4.817926639970137, + "tokens_seen": 93388800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009813804173354735, + "loss": 3.0469, + "theoretical_loss": 4.810873796001641, + "tokens_seen": 94371840 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.022054284811019897, + "objective/train/docs_used": 65494, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.058526992797852, + "objective/train/original_loss": 6.058526992797852, + "objective/train/theoretical_loss": 4.80622393427368, + "objective/train/tokens_used": 115487200, + "objective/train/value_avg": -0.022979736328125, + "objective/train/value_loss": 0.021160762757062912, + "objective/train/value_max": -0.004772186279296875, + "objective/train/value_min": -0.1378173828125, + "objective/train/value_reward_corr": 0.23534788008524066, + "objective/train/value_std": 0.0184783935546875, + "objective/train/weight_avg": 0.9978983402252197, + "objective/train/weighted_lm_loss": 6.043734550476074, + "objective/train/weights_max": 1.0138123035430908, + "objective/train/weights_min": 0.9070895314216614, + "theoretical_loss": 4.80622393427368, + "tokens_seen": 95027200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009810593900481542, + "loss": 3.1057, + "theoretical_loss": 4.803453674356847, + "tokens_seen": 95420416 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009807383627608347, + "loss": 3.0511, + "theoretical_loss": 4.796137196443157, + "tokens_seen": 96468992 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.0004238911496941, + "objective/train/docs_used": 66055, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.411444187164307, + "objective/train/original_loss": 6.411444187164307, + "objective/train/theoretical_loss": 4.7947766867265464, + "objective/train/tokens_used": 117125600, + "objective/train/value_avg": -0.018646240234375, + "objective/train/value_loss": 0.0030906551983207464, + "objective/train/value_max": -0.004665374755859375, + "objective/train/value_min": -0.18359375, + "objective/train/value_reward_corr": 0.1367514558067734, + "objective/train/value_std": 0.0135498046875, + "objective/train/weight_avg": 0.9999729990959167, + "objective/train/weighted_lm_loss": 6.410627841949463, + "objective/train/weights_max": 1.0176327228546143, + "objective/train/weights_min": 0.9411357045173645, + "theoretical_loss": 4.7947766867265464, + "tokens_seen": 96665600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009804173354735152, + "loss": 3.0471, + "theoretical_loss": 4.788921811384128, + "tokens_seen": 97517568 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.00773673364892602, + "objective/train/docs_used": 66835, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.6667256355285645, + "objective/train/original_loss": 6.666726112365723, + "objective/train/theoretical_loss": 4.783575130772016, + "objective/train/tokens_used": 118764000, + "objective/train/value_avg": -0.02044677734375, + "objective/train/value_loss": 0.006503256969153881, + "objective/train/value_max": -0.004718780517578125, + "objective/train/value_min": -0.15966796875, + "objective/train/value_reward_corr": -0.004886830138846855, + "objective/train/value_std": 0.01641845703125, + "objective/train/weight_avg": 1.0008054971694946, + "objective/train/weighted_lm_loss": 6.6725568771362305, + "objective/train/weights_max": 1.0160291194915771, + "objective/train/weights_min": 0.906470537185669, + "theoretical_loss": 4.783575130772016, + "tokens_seen": 98304000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980096308186196, + "loss": 3.0442, + "theoretical_loss": 4.781805057657483, + "tokens_seen": 98566144 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009797752808988764, + "loss": 3.0627, + "theoretical_loss": 4.774784559054009, + "tokens_seen": 99614720 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.0025967254769057035, + "objective/train/docs_used": 67525, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.232954502105713, + "objective/train/original_loss": 6.232954025268555, + "objective/train/theoretical_loss": 4.7726100281615205, + "objective/train/tokens_used": 120402400, + "objective/train/value_avg": -0.021881103515625, + "objective/train/value_loss": 0.0071530393324792385, + "objective/train/value_max": -0.00490570068359375, + "objective/train/value_min": -0.147216796875, + "objective/train/value_reward_corr": 0.07266847315003754, + "objective/train/value_std": 0.015838623046875, + "objective/train/weight_avg": 1.000294804573059, + "objective/train/weighted_lm_loss": 6.235024929046631, + "objective/train/weights_max": 1.014775037765503, + "objective/train/weights_min": 0.9122503399848938, + "theoretical_loss": 4.7726100281615205, + "tokens_seen": 99942400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009794542536115571, + "loss": 3.025, + "theoretical_loss": 4.76785802085957, + "tokens_seen": 100663296 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.001150749740190804, + "objective/train/docs_used": 68617, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.177828788757324, + "objective/train/original_loss": 6.177828788757324, + "objective/train/theoretical_loss": 4.761872632268167, + "objective/train/tokens_used": 122040800, + "objective/train/value_avg": -0.0202484130859375, + "objective/train/value_loss": 0.011535365134477615, + "objective/train/value_max": -0.005039215087890625, + "objective/train/value_min": -0.1473388671875, + "objective/train/value_reward_corr": 0.019205373923562457, + "objective/train/value_std": 0.0150146484375, + "objective/train/weight_avg": 0.9999410510063171, + "objective/train/weighted_lm_loss": 6.177096366882324, + "objective/train/weights_max": 1.014765739440918, + "objective/train/weights_min": 0.9056907892227173, + "theoretical_loss": 4.761872632268167, + "tokens_seen": 101580800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009791332263242376, + "loss": 3.0459, + "theoretical_loss": 4.761023226245718, + "tokens_seen": 101711872 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009788121990369182, + "loss": 3.0307, + "theoretical_loss": 4.754278032855497, + "tokens_seen": 102760448 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.0007664329605177045, + "objective/train/docs_used": 69265, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.94935417175293, + "objective/train/original_loss": 5.949354648590088, + "objective/train/theoretical_loss": 4.751354654492923, + "objective/train/tokens_used": 123679200, + "objective/train/value_avg": -0.01800537109375, + "objective/train/value_loss": 0.004451883025467396, + "objective/train/value_max": -0.004215240478515625, + "objective/train/value_min": -0.1131591796875, + "objective/train/value_reward_corr": -0.08466715315737537, + "objective/train/value_std": 0.01316070556640625, + "objective/train/weight_avg": 1.000098705291748, + "objective/train/weighted_lm_loss": 5.952308654785156, + "objective/train/weights_max": 1.011305809020996, + "objective/train/weights_min": 0.926684558391571, + "theoretical_loss": 4.751354654492923, + "tokens_seen": 103219200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009784911717495989, + "loss": 3.0589, + "theoretical_loss": 4.747620369572012, + "tokens_seen": 103809024 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.003563003381714225, + "objective/train/docs_used": 70556, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.983399391174316, + "objective/train/original_loss": 5.983399391174316, + "objective/train/theoretical_loss": 4.741048233458233, + "objective/train/tokens_used": 125317600, + "objective/train/value_avg": -0.0203704833984375, + "objective/train/value_loss": 0.006391477771103382, + "objective/train/value_max": -0.00469970703125, + "objective/train/value_min": -0.150634765625, + "objective/train/value_reward_corr": 0.002306437368063083, + "objective/train/value_std": 0.015167236328125, + "objective/train/weight_avg": 1.0003876686096191, + "objective/train/weighted_lm_loss": 5.986017227172852, + "objective/train/weights_max": 1.0145875215530396, + "objective/train/weights_min": 0.9086394309997559, + "theoretical_loss": 4.741048233458233, + "tokens_seen": 104857600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009781701444622794, + "loss": 3.019, + "theoretical_loss": 4.741048233458233, + "tokens_seen": 104857600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009778491171749599, + "loss": 2.9991, + "theoretical_loss": 4.734559686857368, + "tokens_seen": 105906176 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.0009891411755234003, + "objective/train/docs_used": 70950, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.907049655914307, + "objective/train/original_loss": 5.907050132751465, + "objective/train/theoretical_loss": 4.73094590671824, + "objective/train/tokens_used": 126956000, + "objective/train/value_avg": -0.021209716796875, + "objective/train/value_loss": 0.0063508739694952965, + "objective/train/value_max": -0.00391387939453125, + "objective/train/value_min": -0.1317138671875, + "objective/train/value_reward_corr": -0.06355585143476727, + "objective/train/value_std": 0.0170440673828125, + "objective/train/weight_avg": 0.9999322891235352, + "objective/train/weighted_lm_loss": 5.907861232757568, + "objective/train/weights_max": 1.013114333152771, + "objective/train/weights_min": 0.908715009689331, + "theoretical_loss": 4.73094590671824, + "tokens_seen": 106496000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009775280898876404, + "loss": 3.0013, + "theoretical_loss": 4.728152854643862, + "tokens_seen": 106954752 + }, + { + "epoch": 0.03, + "learning_rate": 0.000977207062600321, + "loss": 2.9862, + "theoretical_loss": 4.721825921615813, + "tokens_seen": 108003328 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.017633585259318352, + "objective/train/docs_used": 72030, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.896335601806641, + "objective/train/original_loss": 5.896336078643799, + "objective/train/theoretical_loss": 4.721040584744811, + "objective/train/tokens_used": 128594400, + "objective/train/value_avg": -0.022491455078125, + "objective/train/value_loss": 0.001039152848534286, + "objective/train/value_max": -0.004215240478515625, + "objective/train/value_min": -0.150146484375, + "objective/train/value_reward_corr": 0.007357831201055393, + "objective/train/value_std": 0.017608642578125, + "objective/train/weight_avg": 1.0017684698104858, + "objective/train/weighted_lm_loss": 5.907839298248291, + "objective/train/weights_max": 1.0150327682495117, + "objective/train/weights_min": 0.9434331655502319, + "theoretical_loss": 4.721040584744811, + "tokens_seen": 108134400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009768860353130016, + "loss": 3.0017, + "theoretical_loss": 4.71557713002025, + "tokens_seen": 109051904 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.042317669838666916, + "objective/train/docs_used": 72698, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.717645168304443, + "objective/train/original_loss": 5.717645168304443, + "objective/train/theoretical_loss": 4.711325526974863, + "objective/train/tokens_used": 130232800, + "objective/train/value_avg": -0.02069091796875, + "objective/train/value_loss": 0.045439958572387695, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.1309814453125, + "objective/train/value_reward_corr": -0.17833977477710816, + "objective/train/value_std": 0.01552581787109375, + "objective/train/weight_avg": 0.9959893822669983, + "objective/train/weighted_lm_loss": 5.697718143463135, + "objective/train/weights_max": 1.0131272077560425, + "objective/train/weights_min": 0.9107363224029541, + "theoretical_loss": 4.711325526974863, + "tokens_seen": 109772800 + }, + { + "epoch": 0.03, + "learning_rate": 0.000976565008025682, + "loss": 2.9677, + "theoretical_loss": 4.709404777203279, + "tokens_seen": 110100480 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009762439807383627, + "loss": 2.9838, + "theoretical_loss": 4.703307213377671, + "tokens_seen": 111149056 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.01150219514966011, + "objective/train/docs_used": 74080, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.902035236358643, + "objective/train/original_loss": 5.902036190032959, + "objective/train/theoretical_loss": 4.701794319727625, + "objective/train/tokens_used": 131871200, + "objective/train/value_avg": -0.0247955322265625, + "objective/train/value_loss": 0.00734122097492218, + "objective/train/value_max": -0.00426483154296875, + "objective/train/value_min": -0.1654052734375, + "objective/train/value_reward_corr": -0.0059065953626496834, + "objective/train/value_std": 0.02069091796875, + "objective/train/weight_avg": 1.0011860132217407, + "objective/train/weighted_lm_loss": 5.9109039306640625, + "objective/train/weights_max": 1.0165642499923706, + "objective/train/weights_min": 0.9068411588668823, + "theoretical_loss": 4.701794319727625, + "tokens_seen": 111411200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009759229534510433, + "loss": 2.9852, + "theoretical_loss": 4.697282839501012, + "tokens_seen": 112197632 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.004319401923567057, + "objective/train/docs_used": 74817, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.759556770324707, + "objective/train/original_loss": 5.759556770324707, + "objective/train/theoretical_loss": 4.69244085582069, + "objective/train/tokens_used": 133509600, + "objective/train/value_avg": -0.0188751220703125, + "objective/train/value_loss": 0.00831959955394268, + "objective/train/value_max": -0.003063201904296875, + "objective/train/value_min": -0.1614990234375, + "objective/train/value_reward_corr": -0.05141803728776396, + "objective/train/value_std": 0.0154266357421875, + "objective/train/weight_avg": 0.9996088743209839, + "objective/train/weighted_lm_loss": 5.759246349334717, + "objective/train/weights_max": 1.0161831378936768, + "objective/train/weights_min": 0.9088392853736877, + "theoretical_loss": 4.69244085582069, + "tokens_seen": 113049600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009756019261637239, + "loss": 2.9998, + "theoretical_loss": 4.691330105257913, + "tokens_seen": 113246208 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009752808988764044, + "loss": 2.9565, + "theoretical_loss": 4.685447507140298, + "tokens_seen": 114294784 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.0028987082187086344, + "objective/train/docs_used": 76169, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.471240043640137, + "objective/train/original_loss": 5.471240043640137, + "objective/train/theoretical_loss": 4.683259315731689, + "objective/train/tokens_used": 135148000, + "objective/train/value_avg": -0.01953125, + "objective/train/value_loss": 0.004212427884340286, + "objective/train/value_max": -0.003147125244140625, + "objective/train/value_min": -0.140869140625, + "objective/train/value_reward_corr": -0.17627355099628605, + "objective/train/value_std": 0.01541900634765625, + "objective/train/weight_avg": 0.9997311234474182, + "objective/train/weighted_lm_loss": 5.479485988616943, + "objective/train/weights_max": 1.0141255855560303, + "objective/train/weights_min": 0.976077139377594, + "theoretical_loss": 4.683259315731689, + "tokens_seen": 114688000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000974959871589085, + "loss": 2.9719, + "theoretical_loss": 4.679633586620149, + "tokens_seen": 115343360 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.0013318595010787249, + "objective/train/docs_used": 76931, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.117286682128906, + "objective/train/original_loss": 6.117286205291748, + "objective/train/theoretical_loss": 4.674244150168143, + "objective/train/tokens_used": 136786400, + "objective/train/value_avg": -0.024871826171875, + "objective/train/value_loss": 0.007552409544587135, + "objective/train/value_max": -0.0033626556396484375, + "objective/train/value_min": -0.185302734375, + "objective/train/value_reward_corr": 0.10751649943804009, + "objective/train/value_std": 0.020416259765625, + "objective/train/weight_avg": 0.9999039769172668, + "objective/train/weighted_lm_loss": 6.116036415100098, + "objective/train/weights_max": 1.0184671878814697, + "objective/train/weights_min": 0.9098932147026062, + "theoretical_loss": 4.674244150168143, + "tokens_seen": 116326400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009746388443017657, + "loss": 2.9235, + "theoretical_loss": 4.673886928409454, + "tokens_seen": 116391936 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009743178170144463, + "loss": 2.9685, + "theoretical_loss": 4.668206158802439, + "tokens_seen": 117440512 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.017277611419558525, + "objective/train/docs_used": 78354, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.367839813232422, + "objective/train/original_loss": 6.3678412437438965, + "objective/train/theoretical_loss": 4.665390063922041, + "objective/train/tokens_used": 138424800, + "objective/train/value_avg": -0.0247039794921875, + "objective/train/value_loss": 0.002898253034800291, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.19189453125, + "objective/train/value_reward_corr": 0.006363994197481393, + "objective/train/value_std": 0.01849365234375, + "objective/train/weight_avg": 1.0017420053482056, + "objective/train/weighted_lm_loss": 6.380412578582764, + "objective/train/weights_max": 1.019312858581543, + "objective/train/weights_min": 0.9088823199272156, + "theoretical_loss": 4.665390063922041, + "tokens_seen": 117964800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009739967897271268, + "loss": 2.9334, + "theoretical_loss": 4.662589944095533, + "tokens_seen": 118489088 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009736757624398074, + "loss": 2.9505, + "theoretical_loss": 4.657036989080726, + "tokens_seen": 119537664 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.00648023746907711, + "objective/train/docs_used": 78905, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.57493257522583, + "objective/train/original_loss": 5.574932098388672, + "objective/train/theoretical_loss": 4.656692000898091, + "objective/train/tokens_used": 140063200, + "objective/train/value_avg": -0.0230255126953125, + "objective/train/value_loss": 0.005893743131309748, + "objective/train/value_max": -0.002704620361328125, + "objective/train/value_min": -0.1597900390625, + "objective/train/value_reward_corr": 0.005257582566450997, + "objective/train/value_std": 0.0187530517578125, + "objective/train/weight_avg": 1.0006769895553589, + "objective/train/weighted_lm_loss": 5.579159259796143, + "objective/train/weights_max": 1.0160187482833862, + "objective/train/weights_min": 0.9111464619636536, + "theoretical_loss": 4.656692000898091, + "tokens_seen": 119603200 + }, + { + "epoch": 0.04, + "learning_rate": 0.000973354735152488, + "loss": 2.9278, + "theoretical_loss": 4.651546035608336, + "tokens_seen": 120586240 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.010207677260041237, + "objective/train/docs_used": 79692, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.013095378875732, + "objective/train/original_loss": 6.013095378875732, + "objective/train/theoretical_loss": 4.648145130215498, + "objective/train/tokens_used": 141701600, + "objective/train/value_avg": -0.022796630859375, + "objective/train/value_loss": 0.005867982283234596, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.1324462890625, + "objective/train/value_reward_corr": 0.00724300118518583, + "objective/train/value_std": 0.0171966552734375, + "objective/train/weight_avg": 1.001049518585205, + "objective/train/weighted_lm_loss": 6.020602226257324, + "objective/train/weights_max": 1.0131207704544067, + "objective/train/weights_min": 0.9062929153442383, + "theoretical_loss": 4.648145130215498, + "tokens_seen": 121241600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009730337078651685, + "loss": 2.9271, + "theoretical_loss": 4.646115861215389, + "tokens_seen": 121634816 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009727126805778491, + "loss": 2.9001, + "theoretical_loss": 4.640745277816107, + "tokens_seen": 122683392 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.016915490850806236, + "objective/train/docs_used": 81051, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.920280456542969, + "objective/train/original_loss": 5.920281410217285, + "objective/train/theoretical_loss": 4.639744833292989, + "objective/train/tokens_used": 143340000, + "objective/train/value_avg": -0.0221710205078125, + "objective/train/value_loss": 0.0016403989866375923, + "objective/train/value_max": -0.0022335052490234375, + "objective/train/value_min": -0.20654296875, + "objective/train/value_reward_corr": -0.03267231025583579, + "objective/train/value_std": 0.017852783203125, + "objective/train/weight_avg": 1.001699686050415, + "objective/train/weighted_lm_loss": 5.931380271911621, + "objective/train/weights_max": 1.0208085775375366, + "objective/train/weights_min": 0.9073635935783386, + "theoretical_loss": 4.639744833292989, + "tokens_seen": 122880000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009723916532905297, + "loss": 2.9346, + "theoretical_loss": 4.635433130451148, + "tokens_seen": 123731968 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.013338838703930378, + "objective/train/docs_used": 81846, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.321855545043945, + "objective/train/original_loss": 6.3218560218811035, + "objective/train/theoretical_loss": 4.631486691835402, + "objective/train/tokens_used": 144978400, + "objective/train/value_avg": -0.023529052734375, + "objective/train/value_loss": 0.005262937396764755, + "objective/train/value_max": -0.0029354095458984375, + "objective/train/value_min": -0.2086181640625, + "objective/train/value_reward_corr": 0.005847603889331968, + "objective/train/value_std": 0.018341064453125, + "objective/train/weight_avg": 1.0013597011566162, + "objective/train/weighted_lm_loss": 6.331906318664551, + "objective/train/weights_max": 1.0210119485855103, + "objective/train/weights_min": 0.9201222062110901, + "theoretical_loss": 4.631486691835402, + "tokens_seen": 124518400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009720706260032103, + "loss": 2.9086, + "theoretical_loss": 4.630178296092535, + "tokens_seen": 124780544 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009717495987158908, + "loss": 2.9017, + "theoretical_loss": 4.624979682501314, + "tokens_seen": 125829120 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.011121395044028759, + "objective/train/docs_used": 83055, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.253778457641602, + "objective/train/original_loss": 6.253777980804443, + "objective/train/theoretical_loss": 4.623366476647994, + "objective/train/tokens_used": 146616800, + "objective/train/value_avg": -0.023590087890625, + "objective/train/value_loss": 0.005776470527052879, + "objective/train/value_max": -0.00206756591796875, + "objective/train/value_min": -0.322998046875, + "objective/train/value_reward_corr": -0.006745794809292466, + "objective/train/value_std": 0.0204925537109375, + "objective/train/weight_avg": 1.0011403560638428, + "objective/train/weighted_lm_loss": 6.262775421142578, + "objective/train/weights_max": 1.0327677726745605, + "objective/train/weights_min": 0.9062259793281555, + "theoretical_loss": 4.623366476647994, + "tokens_seen": 126156800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009714285714285714, + "loss": 2.9118, + "theoretical_loss": 4.619836227135212, + "tokens_seen": 126877696 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.007541462313383818, + "objective/train/docs_used": 83592, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.218684673309326, + "objective/train/original_loss": 6.218684196472168, + "objective/train/theoretical_loss": 4.615380137211477, + "objective/train/tokens_used": 148255200, + "objective/train/value_avg": -0.0208740234375, + "objective/train/value_loss": 0.0038114397320896387, + "objective/train/value_max": -0.0020198822021484375, + "objective/train/value_min": -0.2218017578125, + "objective/train/value_reward_corr": -0.03283921296151024, + "objective/train/value_std": 0.0194549560546875, + "objective/train/weight_avg": 1.0007730722427368, + "objective/train/weighted_lm_loss": 6.225551128387451, + "objective/train/weights_max": 1.0223711729049683, + "objective/train/weights_min": 0.9356747269630432, + "theoretical_loss": 4.615380137211477, + "tokens_seen": 127795200 + }, + { + "epoch": 0.04, + "learning_rate": 0.000971107544141252, + "loss": 2.8914, + "theoretical_loss": 4.6147468961037195, + "tokens_seen": 127926272 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009707865168539325, + "loss": 2.9025, + "theoretical_loss": 4.609710683168146, + "tokens_seen": 128974848 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.01735193468630314, + "objective/train/docs_used": 84814, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.805196762084961, + "objective/train/original_loss": 5.805197238922119, + "objective/train/theoretical_loss": 4.607523791957059, + "objective/train/tokens_used": 149893600, + "objective/train/value_avg": -0.023712158203125, + "objective/train/value_loss": 0.0028286706656217575, + "objective/train/value_max": -0.002010345458984375, + "objective/train/value_min": -0.35693359375, + "objective/train/value_reward_corr": -0.016638797462229342, + "objective/train/value_std": 0.0240020751953125, + "objective/train/weight_avg": 1.0017491579055786, + "objective/train/weighted_lm_loss": 5.8162841796875, + "objective/train/weights_max": 1.036261796951294, + "objective/train/weights_min": 0.9122956991195679, + "theoretical_loss": 4.607523791957059, + "tokens_seen": 129433600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009704654895666132, + "loss": 2.8423, + "theoretical_loss": 4.604726608784391, + "tokens_seen": 130023424 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0019749728962779045, + "objective/train/docs_used": 85560, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.877983570098877, + "objective/train/original_loss": 5.877983093261719, + "objective/train/theoretical_loss": 4.599793719186264, + "objective/train/tokens_used": 151532000, + "objective/train/value_avg": -0.0196380615234375, + "objective/train/value_loss": 0.004427865147590637, + "objective/train/value_max": -0.0018968582153320312, + "objective/train/value_min": -0.1812744140625, + "objective/train/value_reward_corr": 0.02956650100763648, + "objective/train/value_std": 0.0178680419921875, + "objective/train/weight_avg": 1.0002191066741943, + "objective/train/weighted_lm_loss": 5.879475116729736, + "objective/train/weights_max": 1.0174425840377808, + "objective/train/weights_min": 0.9062412977218628, + "theoretical_loss": 4.599793719186264, + "tokens_seen": 131072000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009701444622792938, + "loss": 2.9014, + "theoretical_loss": 4.599793719186264, + "tokens_seen": 131072000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009698234349919744, + "loss": 2.8856, + "theoretical_loss": 4.594911085507325, + "tokens_seen": 132120576 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0019100214121863246, + "objective/train/docs_used": 86181, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.843048572540283, + "objective/train/original_loss": 5.843048572540283, + "objective/train/theoretical_loss": 4.592186348585331, + "objective/train/tokens_used": 153170400, + "objective/train/value_avg": -0.0263214111328125, + "objective/train/value_loss": 0.011606492102146149, + "objective/train/value_max": -0.002323150634765625, + "objective/train/value_min": -0.235107421875, + "objective/train/value_reward_corr": 0.022812717022212197, + "objective/train/value_std": 0.0237884521484375, + "objective/train/weight_avg": 1.000247836112976, + "objective/train/weighted_lm_loss": 5.843640327453613, + "objective/train/weights_max": 1.0237185955047607, + "objective/train/weights_min": 0.9126391410827637, + "theoretical_loss": 4.592186348585331, + "tokens_seen": 132710400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009695024077046549, + "loss": 2.8765, + "theoretical_loss": 4.5900778029393425, + "tokens_seen": 133169152 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009691813804173355, + "loss": 2.8671, + "theoretical_loss": 4.585292989925557, + "tokens_seen": 134217728 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0053337691351771355, + "objective/train/docs_used": 87535, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.9014763832092285, + "objective/train/original_loss": 5.901476860046387, + "objective/train/theoretical_loss": 4.584698253288433, + "objective/train/tokens_used": 154808800, + "objective/train/value_avg": -0.0202178955078125, + "objective/train/value_loss": 0.005244651343673468, + "objective/train/value_max": -0.001987457275390625, + "objective/train/value_min": -0.137939453125, + "objective/train/value_reward_corr": -0.059641540767675354, + "objective/train/value_std": 0.0169525146484375, + "objective/train/weight_avg": 1.0005592107772827, + "objective/train/weighted_lm_loss": 5.906383514404297, + "objective/train/weights_max": 1.0138224363327026, + "objective/train/weights_min": 0.942801296710968, + "theoretical_loss": 4.584698253288433, + "tokens_seen": 134348800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009688603531300161, + "loss": 2.8644, + "theoretical_loss": 4.580555787387068, + "tokens_seen": 135266304 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": -0.0034272039774805307, + "objective/train/docs_used": 88189, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.129852294921875, + "objective/train/original_loss": 5.129852771759033, + "objective/train/theoretical_loss": 4.577326142448035, + "objective/train/tokens_used": 156447200, + "objective/train/value_avg": -0.0179595947265625, + "objective/train/value_loss": 0.0027850917540490627, + "objective/train/value_max": -0.001720428466796875, + "objective/train/value_min": -0.244384765625, + "objective/train/value_reward_corr": 0.18414309768621753, + "objective/train/value_std": 0.015777587890625, + "objective/train/weight_avg": 0.9996711015701294, + "objective/train/weighted_lm_loss": 5.129601001739502, + "objective/train/weights_max": 1.0246531963348389, + "objective/train/weights_min": 0.9304332137107849, + "theoretical_loss": 4.577326142448035, + "tokens_seen": 135987200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009685393258426967, + "loss": 2.881, + "theoretical_loss": 4.575865357980726, + "tokens_seen": 136314880 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009682182985553772, + "loss": 2.8854, + "theoretical_loss": 4.571220885387013, + "tokens_seen": 137363456 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.011118283495306969, + "objective/train/docs_used": 88907, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.2938079833984375, + "objective/train/original_loss": 5.2938079833984375, + "objective/train/theoretical_loss": 4.570066854274275, + "objective/train/tokens_used": 158085600, + "objective/train/value_avg": -0.018798828125, + "objective/train/value_loss": 0.0028324818704277277, + "objective/train/value_max": -0.001926422119140625, + "objective/train/value_min": -0.1593017578125, + "objective/train/value_reward_corr": -0.022661362993536795, + "objective/train/value_std": 0.01465606689453125, + "objective/train/weight_avg": 1.001125693321228, + "objective/train/weighted_lm_loss": 5.300258636474609, + "objective/train/weights_max": 1.015927791595459, + "objective/train/weights_min": 0.9091329574584961, + "theoretical_loss": 4.570066854274275, + "tokens_seen": 137625600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009678972712680578, + "loss": 2.8669, + "theoretical_loss": 4.566621573626489, + "tokens_seen": 138412032 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.016580553725361824, + "objective/train/docs_used": 90119, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.607501983642578, + "objective/train/original_loss": 5.607501983642578, + "objective/train/theoretical_loss": 4.562917349508602, + "objective/train/tokens_used": 159724000, + "objective/train/value_avg": -0.021240234375, + "objective/train/value_loss": 0.0011706296354532242, + "objective/train/value_max": -0.0030040740966796875, + "objective/train/value_min": -0.2333984375, + "objective/train/value_reward_corr": -0.02812973255200934, + "objective/train/value_std": 0.01467132568359375, + "objective/train/weight_avg": 1.0016639232635498, + "objective/train/weighted_lm_loss": 5.618233680725098, + "objective/train/weights_max": 1.023522973060608, + "objective/train/weights_min": 0.9242558479309082, + "theoretical_loss": 4.562917349508602, + "tokens_seen": 139264000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009675762439807384, + "loss": 2.9086, + "theoretical_loss": 4.562066646403457, + "tokens_seen": 139460608 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009672552166934189, + "loss": 2.9287, + "theoretical_loss": 4.557555346475546, + "tokens_seen": 140509184 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.002196705434471369, + "objective/train/docs_used": 90792, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.268797397613525, + "objective/train/original_loss": 6.268797874450684, + "objective/train/theoretical_loss": 4.5558747052997965, + "objective/train/tokens_used": 161362400, + "objective/train/value_avg": -0.0188140869140625, + "objective/train/value_loss": 0.008854576386511326, + "objective/train/value_max": -0.00391387939453125, + "objective/train/value_min": -0.1678466796875, + "objective/train/value_reward_corr": 0.006488739926883352, + "objective/train/value_std": 0.01348114013671875, + "objective/train/weight_avg": 1.000262975692749, + "objective/train/weighted_lm_loss": 6.271207809448242, + "objective/train/weights_max": 1.0168086290359497, + "objective/train/weights_min": 0.9059775471687317, + "theoretical_loss": 4.5558747052997965, + "tokens_seen": 140902400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009669341894060996, + "loss": 2.8953, + "theoretical_loss": 4.553086935048029, + "tokens_seen": 141557760 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.018412074074149132, + "objective/train/docs_used": 92175, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.970637798309326, + "objective/train/original_loss": 5.970638275146484, + "objective/train/theoretical_loss": 4.548936109453223, + "objective/train/tokens_used": 163000800, + "objective/train/value_avg": -0.022857666015625, + "objective/train/value_loss": 0.0008758631302043796, + "objective/train/value_max": -0.002124786376953125, + "objective/train/value_min": -0.306640625, + "objective/train/value_reward_corr": -0.016083901552148158, + "objective/train/value_std": 0.0185699462890625, + "objective/train/weight_avg": 1.0018454790115356, + "objective/train/weighted_lm_loss": 5.981850624084473, + "objective/train/weights_max": 1.0310492515563965, + "objective/train/weights_min": 0.9863534569740295, + "theoretical_loss": 4.548936109453223, + "tokens_seen": 142540800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009666131621187802, + "loss": 2.9048, + "theoretical_loss": 4.5486606911917145, + "tokens_seen": 142606336 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009662921348314608, + "loss": 2.8598, + "theoretical_loss": 4.544275911283326, + "tokens_seen": 143654912 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.014845302328467369, + "objective/train/docs_used": 92798, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.288022994995117, + "objective/train/original_loss": 5.288022994995117, + "objective/train/theoretical_loss": 4.542098855026559, + "objective/train/tokens_used": 164639200, + "objective/train/value_avg": -0.0238494873046875, + "objective/train/value_loss": 0.0014993957011029124, + "objective/train/value_max": -0.0020751953125, + "objective/train/value_min": -0.41845703125, + "objective/train/value_reward_corr": 0.15179281577629566, + "objective/train/value_std": 0.0267181396484375, + "objective/train/weight_avg": 1.0014920234680176, + "objective/train/weighted_lm_loss": 5.296165943145752, + "objective/train/weights_max": 1.0426503419876099, + "objective/train/weights_min": 0.9575287103652954, + "theoretical_loss": 4.542098855026559, + "tokens_seen": 144179200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009659711075441413, + "loss": 2.8781, + "theoretical_loss": 4.539931908467359, + "tokens_seen": 144703488 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009656500802568219, + "loss": 2.8948, + "theoretical_loss": 4.53562801213843, + "tokens_seen": 145752064 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0013610897585749626, + "objective/train/docs_used": 94036, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.023899078369141, + "objective/train/original_loss": 6.023899555206299, + "objective/train/theoretical_loss": 4.535360335247455, + "objective/train/tokens_used": 166277600, + "objective/train/value_avg": -0.018157958984375, + "objective/train/value_loss": 0.0026959851384162903, + "objective/train/value_max": -0.0021991729736328125, + "objective/train/value_min": -0.264404296875, + "objective/train/value_reward_corr": 0.011397827080305897, + "objective/train/value_std": 0.01300811767578125, + "objective/train/weight_avg": 1.0001494884490967, + "objective/train/weighted_lm_loss": 6.025206565856934, + "objective/train/weights_max": 1.0267384052276611, + "objective/train/weights_min": 0.9434385299682617, + "theoretical_loss": 4.535360335247455, + "tokens_seen": 145817600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009653290529695025, + "loss": 2.8704, + "theoretical_loss": 4.531363567443194, + "tokens_seen": 146800640 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.008318365551531315, + "objective/train/docs_used": 94726, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.829771041870117, + "objective/train/original_loss": 5.829771041870117, + "objective/train/theoretical_loss": 4.5287180387305765, + "objective/train/tokens_used": 167916000, + "objective/train/value_avg": -0.0213165283203125, + "objective/train/value_loss": 0.005823276471346617, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.293701171875, + "objective/train/value_reward_corr": 0.01501033850805037, + "objective/train/value_std": 0.021270751953125, + "objective/train/weight_avg": 1.000860333442688, + "objective/train/weighted_lm_loss": 5.834353923797607, + "objective/train/weights_max": 1.0287301540374756, + "objective/train/weights_min": 0.905689537525177, + "theoretical_loss": 4.5287180387305765, + "tokens_seen": 147456000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000965008025682183, + "loss": 2.8549, + "theoretical_loss": 4.527137934800969, + "tokens_seen": 147849216 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009646869983948636, + "loss": 2.8698, + "theoretical_loss": 4.522950489442225, + "tokens_seen": 148897792 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.011002630926668644, + "objective/train/docs_used": 95869, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.841072082519531, + "objective/train/original_loss": 5.841071605682373, + "objective/train/theoretical_loss": 4.522169544973266, + "objective/train/tokens_used": 169554400, + "objective/train/value_avg": -0.0172882080078125, + "objective/train/value_loss": 0.014208096079528332, + "objective/train/value_max": -0.0025806427001953125, + "objective/train/value_min": -0.1207275390625, + "objective/train/value_reward_corr": -0.03615030961500503, + "objective/train/value_std": 0.01369476318359375, + "objective/train/weight_avg": 0.9989691376686096, + "objective/train/weighted_lm_loss": 5.837697505950928, + "objective/train/weights_max": 1.0120877027511597, + "objective/train/weights_min": 0.9075621366500854, + "theoretical_loss": 4.522169544973266, + "tokens_seen": 149094400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009643659711075442, + "loss": 2.8565, + "theoretical_loss": 4.518800620964164, + "tokens_seen": 149946368 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.012030016630887985, + "objective/train/docs_used": 96442, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.311154842376709, + "objective/train/original_loss": 5.311154365539551, + "objective/train/theoretical_loss": 4.515712520110756, + "objective/train/tokens_used": 171192800, + "objective/train/value_avg": -0.01947021484375, + "objective/train/value_loss": 0.0015685056569054723, + "objective/train/value_max": -0.0025806427001953125, + "objective/train/value_min": -0.353759765625, + "objective/train/value_reward_corr": -0.08056230903210176, + "objective/train/value_std": 0.01525115966796875, + "objective/train/weight_avg": 1.0012108087539673, + "objective/train/weighted_lm_loss": 5.31884241104126, + "objective/train/weights_max": 1.03594172000885, + "objective/train/weights_min": 0.9121072292327881, + "theoretical_loss": 4.515712520110756, + "tokens_seen": 150732800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009640449438202248, + "loss": 2.8274, + "theoretical_loss": 4.514687732902628, + "tokens_seen": 150994944 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009637239165329053, + "loss": 2.8125, + "theoretical_loss": 4.510611242319637, + "tokens_seen": 152043520 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.011073105037212372, + "objective/train/docs_used": 97778, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.14996862411499, + "objective/train/original_loss": 6.149969577789307, + "objective/train/theoretical_loss": 4.509344712913311, + "objective/train/tokens_used": 172831200, + "objective/train/value_avg": -0.0186767578125, + "objective/train/value_loss": 0.0021257330663502216, + "objective/train/value_max": -0.0025310516357421875, + "objective/train/value_min": -0.280029296875, + "objective/train/value_reward_corr": -0.00958096618978242, + "objective/train/value_std": 0.0146636962890625, + "objective/train/weight_avg": 1.0011177062988281, + "objective/train/weighted_lm_loss": 6.158018112182617, + "objective/train/weights_max": 1.0283368825912476, + "objective/train/weights_min": 0.9072344303131104, + "theoretical_loss": 4.509344712913311, + "tokens_seen": 152371200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009634028892455859, + "loss": 2.8172, + "theoretical_loss": 4.506570579405888, + "tokens_seen": 153092096 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.008141989819705486, + "objective/train/docs_used": 98099, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.7406182289123535, + "objective/train/original_loss": 5.7406182289123535, + "objective/train/theoretical_loss": 4.503063951009098, + "objective/train/tokens_used": 174469600, + "objective/train/value_avg": -0.019775390625, + "objective/train/value_loss": 0.004934609867632389, + "objective/train/value_max": -0.00254058837890625, + "objective/train/value_min": -0.37060546875, + "objective/train/value_reward_corr": -0.031928330754367545, + "objective/train/value_std": 0.018798828125, + "objective/train/weight_avg": 1.0008383989334106, + "objective/train/weighted_lm_loss": 5.746491432189941, + "objective/train/weights_max": 1.0372792482376099, + "objective/train/weights_min": 0.9097643494606018, + "theoretical_loss": 4.503063951009098, + "tokens_seen": 154009600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009630818619582666, + "loss": 2.8188, + "theoretical_loss": 4.502565187097554, + "tokens_seen": 154140672 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009627608346709471, + "loss": 2.805, + "theoretical_loss": 4.498594520706801, + "tokens_seen": 155189248 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0009928768267855048, + "objective/train/docs_used": 99496, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.978617191314697, + "objective/train/original_loss": 5.978617191314697, + "objective/train/theoretical_loss": 4.496868137317781, + "objective/train/tokens_used": 176108000, + "objective/train/value_avg": -0.020111083984375, + "objective/train/value_loss": 0.00784789677709341, + "objective/train/value_max": -0.002414703369140625, + "objective/train/value_min": -0.294921875, + "objective/train/value_reward_corr": 0.1565045894369238, + "objective/train/value_std": 0.017791748046875, + "objective/train/weight_avg": 1.0001378059387207, + "objective/train/weighted_lm_loss": 5.981656074523926, + "objective/train/weights_max": 1.0261452198028564, + "objective/train/weights_min": 0.9070854783058167, + "theoretical_loss": 4.496868137317781, + "tokens_seen": 155648000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009624398073836276, + "loss": 2.8559, + "theoretical_loss": 4.494658047565416, + "tokens_seen": 156237824 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.003592036198824644, + "objective/train/docs_used": 100000, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.545536518096924, + "objective/train/original_loss": 5.545536041259766, + "objective/train/theoretical_loss": 4.490755246681026, + "objective/train/tokens_used": 177746400, + "objective/train/value_avg": -0.0212860107421875, + "objective/train/value_loss": 0.004894082434475422, + "objective/train/value_max": -0.0025119781494140625, + "objective/train/value_min": -0.323486328125, + "objective/train/value_reward_corr": 0.097253681297709, + "objective/train/value_std": 0.018035888671875, + "objective/train/weight_avg": 0.9996649622917175, + "objective/train/weighted_lm_loss": 5.5423760414123535, + "objective/train/weights_max": 1.028020977973938, + "objective/train/weights_min": 0.9215055108070374, + "theoretical_loss": 4.490755246681026, + "tokens_seen": 157286400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009621187800963082, + "loss": 2.8304, + "theoretical_loss": 4.490755246681026, + "tokens_seen": 157286400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009617977528089888, + "loss": 2.8184, + "theoretical_loss": 4.48688560840535, + "tokens_seen": 158334976 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.012915777042508125, + "objective/train/docs_used": 101476, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.470151424407959, + "objective/train/original_loss": 5.470151424407959, + "objective/train/theoretical_loss": 4.484723322677097, + "objective/train/tokens_used": 179384800, + "objective/train/value_avg": -0.021209716796875, + "objective/train/value_loss": 0.002817628439515829, + "objective/train/value_max": -0.0027675628662109375, + "objective/train/value_min": -0.31591796875, + "objective/train/value_reward_corr": -0.011093467019479962, + "objective/train/value_std": 0.016357421875, + "objective/train/weight_avg": 1.0013054609298706, + "objective/train/weighted_lm_loss": 5.4784111976623535, + "objective/train/weights_max": 1.0320335626602173, + "objective/train/weights_min": 0.9112136960029602, + "theoretical_loss": 4.484723322677097, + "tokens_seen": 158924800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009614767255216693, + "loss": 2.8525, + "theoretical_loss": 4.483048634114016, + "tokens_seen": 159383552 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009611556982343499, + "loss": 2.8159, + "theoretical_loss": 4.479243835897444, + "tokens_seen": 160432128 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.0057806214317679405, + "objective/train/docs_used": 102054, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.710201263427734, + "objective/train/original_loss": 5.710200786590576, + "objective/train/theoretical_loss": 4.478770474607726, + "objective/train/tokens_used": 181023200, + "objective/train/value_avg": -0.0212554931640625, + "objective/train/value_loss": 0.006892765406519175, + "objective/train/value_max": -0.0021915435791015625, + "objective/train/value_min": -0.272216796875, + "objective/train/value_reward_corr": -0.0510059668883443, + "objective/train/value_std": 0.0215606689453125, + "objective/train/weight_avg": 0.999455988407135, + "objective/train/weighted_lm_loss": 5.708116054534912, + "objective/train/weights_max": 1.027523398399353, + "objective/train/weights_min": 0.9268876314163208, + "theoretical_loss": 4.478770474607726, + "tokens_seen": 160563200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009608346709470305, + "loss": 2.818, + "theoretical_loss": 4.475470736262361, + "tokens_seen": 161480704 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.005808263551443815, + "objective/train/docs_used": 102718, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.921733379364014, + "objective/train/original_loss": 5.9217329025268555, + "objective/train/theoretical_loss": 4.4728948746462684, + "objective/train/tokens_used": 182661600, + "objective/train/value_avg": -0.017913818359375, + "objective/train/value_loss": 0.006334791891276836, + "objective/train/value_max": -0.0022525787353515625, + "objective/train/value_min": -0.10858154296875, + "objective/train/value_reward_corr": -0.10815229619080374, + "objective/train/value_std": 0.012176513671875, + "objective/train/weight_avg": 0.9994502663612366, + "objective/train/weighted_lm_loss": 5.92132043838501, + "objective/train/weights_max": 1.010683536529541, + "objective/train/weights_min": 0.9074490070343018, + "theoretical_loss": 4.4728948746462684, + "tokens_seen": 162201600 + }, + { + "epoch": 0.05, + "learning_rate": 0.000960513643659711, + "loss": 2.7999, + "theoretical_loss": 4.471728867843497, + "tokens_seen": 162529280 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009601926163723916, + "loss": 2.797, + "theoretical_loss": 4.4680177731250765, + "tokens_seen": 163577856 + }, + { + "debugging/Self-BLEU-5": 0.47376287031010694, + "debugging/distinct-1-grams": 0.7718165351312889, + "debugging/distinct-2-grams": 0.9624982234606337, + "debugging/entropy-1-grams": 5.847463360327344, + "debugging/entropy-2-grams": 6.816946097439353, + "debugging/length": 477.7857142857143, + "debugging/num_segments": 14, + "debugging/raw_token_scores_avg": 0.02379879727959633, + "debugging/raw_token_scores_std": 0.13287928700447083, + "epoch": 0.05, + "objective/train/advantage_avg": -0.0032742186449468136, + "objective/train/docs_used": 104069, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.333648681640625, + "objective/train/original_loss": 5.333648204803467, + "objective/train/theoretical_loss": 4.467094755136979, + "objective/train/tokens_used": 184300000, + "objective/train/value_avg": -0.020538330078125, + "objective/train/value_loss": 0.017967160791158676, + "objective/train/value_max": -0.004230499267578125, + "objective/train/value_min": -0.1177978515625, + "objective/train/value_reward_corr": -0.043457692897788607, + "objective/train/value_std": 0.0121002197265625, + "objective/train/weight_avg": 0.9997598528862, + "objective/train/weighted_lm_loss": 5.332858562469482, + "objective/train/weights_max": 1.01178777217865, + "objective/train/weights_min": 0.9061261415481567, + "theoretical_loss": 4.467094755136979, + "tokens_seen": 163840000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009598715890850722, + "loss": 2.7833, + "theoretical_loss": 4.464337004171679, + "tokens_seen": 164626432 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.032475654035806656, + "objective/train/docs_used": 105300, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.76785135269165, + "objective/train/original_loss": 5.767850875854492, + "objective/train/theoretical_loss": 4.461368406036007, + "objective/train/tokens_used": 185938400, + "objective/train/value_avg": -0.015899658203125, + "objective/train/value_loss": 0.03742058575153351, + "objective/train/value_max": -0.00254058837890625, + "objective/train/value_min": -0.12249755859375, + "objective/train/value_reward_corr": -0.17204993304755758, + "objective/train/value_std": 0.01163482666015625, + "objective/train/weight_avg": 0.9969340562820435, + "objective/train/weighted_lm_loss": 5.749564170837402, + "objective/train/weights_max": 1.0087382793426514, + "objective/train/weights_min": 0.9074539542198181, + "theoretical_loss": 4.461368406036007, + "tokens_seen": 165478400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009595505617977528, + "loss": 2.7999, + "theoretical_loss": 4.460686122368132, + "tokens_seen": 165675008 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009592295345104333, + "loss": 2.8047, + "theoretical_loss": 4.457064698168051, + "tokens_seen": 166723584 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.010828500613570213, + "objective/train/docs_used": 106148, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.59659481048584, + "objective/train/original_loss": 4.596595287322998, + "objective/train/theoretical_loss": 4.455714172485305, + "objective/train/tokens_used": 187576800, + "objective/train/value_avg": -0.017181396484375, + "objective/train/value_loss": 0.002017111051827669, + "objective/train/value_max": -0.002452850341796875, + "objective/train/value_min": -0.1873779296875, + "objective/train/value_reward_corr": -0.040128454191962676, + "objective/train/value_std": 0.01110076904296875, + "objective/train/weight_avg": 1.0010926723480225, + "objective/train/weighted_lm_loss": 4.601603031158447, + "objective/train/weights_max": 1.0187095403671265, + "objective/train/weights_min": 0.9116122722625732, + "theoretical_loss": 4.455714172485305, + "tokens_seen": 167116800 + }, + { + "epoch": 0.05, + "learning_rate": 0.000958908507223114, + "loss": 2.8165, + "theoretical_loss": 4.453472310850701, + "tokens_seen": 167772160 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.002418158110231161, + "objective/train/docs_used": 106755, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.606968402862549, + "objective/train/original_loss": 5.606968402862549, + "objective/train/theoretical_loss": 4.450130452511366, + "objective/train/tokens_used": 189215200, + "objective/train/value_avg": -0.01806640625, + "objective/train/value_loss": 0.009480397216975689, + "objective/train/value_max": -0.002590179443359375, + "objective/train/value_min": -0.149658203125, + "objective/train/value_reward_corr": -0.09052659199200813, + "objective/train/value_std": 0.01297760009765625, + "objective/train/weight_avg": 1.0002880096435547, + "objective/train/weighted_lm_loss": 5.61273717880249, + "objective/train/weights_max": 1.0149734020233154, + "objective/train/weights_min": 0.9093750715255737, + "theoretical_loss": 4.450130452511366, + "tokens_seen": 168755200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009585874799357946, + "loss": 2.8009, + "theoretical_loss": 4.449908548285846, + "tokens_seen": 168820736 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009582664526484752, + "loss": 2.7892, + "theoretical_loss": 4.446373006706281, + "tokens_seen": 169869312 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.013412320986390114, + "objective/train/docs_used": 107830, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.480970859527588, + "objective/train/original_loss": 5.480970859527588, + "objective/train/theoretical_loss": 4.44461569484119, + "objective/train/tokens_used": 190853600, + "objective/train/value_avg": -0.01861572265625, + "objective/train/value_loss": 0.0023385928943753242, + "objective/train/value_max": -0.0027904510498046875, + "objective/train/value_min": -0.1331787109375, + "objective/train/value_reward_corr": 0.003107433324399046, + "objective/train/value_std": 0.0118408203125, + "objective/train/weight_avg": 1.0013526678085327, + "objective/train/weighted_lm_loss": 5.488888263702393, + "objective/train/weights_max": 1.0132873058319092, + "objective/train/weights_min": 0.9134127497673035, + "theoretical_loss": 4.44461569484119, + "tokens_seen": 170393600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009579454253611557, + "loss": 2.7792, + "theoretical_loss": 4.442865290487752, + "tokens_seen": 170917888 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009576243980738363, + "loss": 2.7993, + "theoretical_loss": 4.439385011935977, + "tokens_seen": 171966464 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.007773547898977995, + "objective/train/docs_used": 108471, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.654592037200928, + "objective/train/original_loss": 5.654592037200928, + "objective/train/theoretical_loss": 4.439168396828466, + "objective/train/tokens_used": 192492000, + "objective/train/value_avg": -0.01934814453125, + "objective/train/value_loss": 0.004770443309098482, + "objective/train/value_max": -0.0026721954345703125, + "objective/train/value_min": -0.1485595703125, + "objective/train/value_reward_corr": -0.009421590675269145, + "objective/train/value_std": 0.01335906982421875, + "objective/train/weight_avg": 1.0008007287979126, + "objective/train/weighted_lm_loss": 5.660194396972656, + "objective/train/weights_max": 1.0139912366867065, + "objective/train/weights_min": 0.9104447364807129, + "theoretical_loss": 4.439168396828466, + "tokens_seen": 172032000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009573033707865169, + "loss": 2.7549, + "theoretical_loss": 4.435931791080489, + "tokens_seen": 173015040 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.009209288284182549, + "objective/train/docs_used": 109645, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.462757587432861, + "objective/train/original_loss": 5.462756633758545, + "objective/train/theoretical_loss": 4.433787102483406, + "objective/train/tokens_used": 194130400, + "objective/train/value_avg": -0.0201416015625, + "objective/train/value_loss": 0.0028335729148238897, + "objective/train/value_max": -0.0026416778564453125, + "objective/train/value_min": -0.22021484375, + "objective/train/value_reward_corr": 0.06258672483794218, + "objective/train/value_std": 0.0184783935546875, + "objective/train/weight_avg": 1.0009349584579468, + "objective/train/weighted_lm_loss": 5.467906951904297, + "objective/train/weights_max": 1.020638108253479, + "objective/train/weights_min": 0.9092498421669006, + "theoretical_loss": 4.433787102483406, + "tokens_seen": 173670400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009569823434991974, + "loss": 2.7811, + "theoretical_loss": 4.43250525547506, + "tokens_seen": 174063616 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956661316211878, + "loss": 2.7656, + "theoretical_loss": 4.429105040004445, + "tokens_seen": 175112192 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.0009191831923089921, + "objective/train/docs_used": 110166, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.118589878082275, + "objective/train/original_loss": 5.118589401245117, + "objective/train/theoretical_loss": 4.4284704006001325, + "objective/train/tokens_used": 195768800, + "objective/train/value_avg": -0.0160064697265625, + "objective/train/value_loss": 0.0027426353190094233, + "objective/train/value_max": -0.0027141571044921875, + "objective/train/value_min": -0.1927490234375, + "objective/train/value_reward_corr": -0.09258648273866459, + "objective/train/value_std": 0.01132965087890625, + "objective/train/weight_avg": 0.9999216794967651, + "objective/train/weighted_lm_loss": 5.1195197105407715, + "objective/train/weights_max": 1.0176140069961548, + "objective/train/weights_min": 0.9271290302276611, + "theoretical_loss": 4.4284704006001325, + "tokens_seen": 175308800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009563402889245586, + "loss": 2.7509, + "theoretical_loss": 4.4257307866972155, + "tokens_seen": 176160768 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.0050432393327355385, + "objective/train/docs_used": 110915, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.636519432067871, + "objective/train/original_loss": 5.6365203857421875, + "objective/train/theoretical_loss": 4.42321692297592, + "objective/train/tokens_used": 197407200, + "objective/train/value_avg": -0.0202789306640625, + "objective/train/value_loss": 0.008647514507174492, + "objective/train/value_max": -0.0030155181884765625, + "objective/train/value_min": -0.25341796875, + "objective/train/value_reward_corr": 0.07479986603231073, + "objective/train/value_std": 0.0154266357421875, + "objective/train/weight_avg": 0.9995380640029907, + "objective/train/weighted_lm_loss": 5.635501384735107, + "objective/train/weights_max": 1.0255699157714844, + "objective/train/weights_min": 0.9083216190338135, + "theoretical_loss": 4.42321692297592, + "tokens_seen": 176947200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009560192616372392, + "loss": 2.6889, + "theoretical_loss": 4.422382144544446, + "tokens_seen": 177209344 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009556982343499197, + "loss": 2.7599, + "theoretical_loss": 4.419058769324055, + "tokens_seen": 178257920 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.033805180341005325, + "objective/train/docs_used": 112413, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.5346808433532715, + "objective/train/original_loss": 5.53468132019043, + "objective/train/theoretical_loss": 4.418025342716991, + "objective/train/tokens_used": 199045600, + "objective/train/value_avg": -0.020294189453125, + "objective/train/value_loss": 0.04464875906705856, + "objective/train/value_max": -0.00399017333984375, + "objective/train/value_min": -0.2193603515625, + "objective/train/value_reward_corr": 0.01204339035327772, + "objective/train/value_std": 0.01390838623046875, + "objective/train/weight_avg": 0.9968361258506775, + "objective/train/weighted_lm_loss": 5.515910625457764, + "objective/train/weights_max": 1.02181077003479, + "objective/train/weights_min": 0.9057291150093079, + "theoretical_loss": 4.418025342716991, + "tokens_seen": 178585600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009553772070626003, + "loss": 2.7541, + "theoretical_loss": 4.415760323430568, + "tokens_seen": 179306496 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": -0.010524842888116837, + "objective/train/docs_used": 112982, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.126819133758545, + "objective/train/original_loss": 5.126818656921387, + "objective/train/theoretical_loss": 4.412894372625901, + "objective/train/tokens_used": 200684000, + "objective/train/value_avg": -0.017303466796875, + "objective/train/value_loss": 0.009243300184607506, + "objective/train/value_max": -0.0028667449951171875, + "objective/train/value_min": -0.29345703125, + "objective/train/value_reward_corr": -0.006537378093011969, + "objective/train/value_std": 0.014862060546875, + "objective/train/weight_avg": 0.998992919921875, + "objective/train/weighted_lm_loss": 5.120220184326172, + "objective/train/weights_max": 1.0297119617462158, + "objective/train/weights_min": 0.9215586185455322, + "theoretical_loss": 4.412894372625901, + "tokens_seen": 180224000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000955056179775281, + "loss": 2.7124, + "theoretical_loss": 4.412486475710132, + "tokens_seen": 180355072 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009547351524879615, + "loss": 2.7164, + "theoretical_loss": 4.409236901300563, + "tokens_seen": 181403648 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.010954967699944973, + "objective/train/docs_used": 114216, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.518641471862793, + "objective/train/original_loss": 5.518641948699951, + "objective/train/theoretical_loss": 4.407822763665887, + "objective/train/tokens_used": 202322400, + "objective/train/value_avg": -0.0197906494140625, + "objective/train/value_loss": 0.0024598489981144667, + "objective/train/value_max": -0.0029697418212890625, + "objective/train/value_min": -0.2186279296875, + "objective/train/value_reward_corr": -0.002430101719770805, + "objective/train/value_std": 0.0142974853515625, + "objective/train/weight_avg": 1.0011075735092163, + "objective/train/weighted_lm_loss": 5.526012420654297, + "objective/train/weights_max": 1.0219247341156006, + "objective/train/weights_min": 0.9158818125724792, + "theoretical_loss": 4.407822763665887, + "tokens_seen": 181862400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009544141252006421, + "loss": 2.7309, + "theoretical_loss": 4.406011281476267, + "tokens_seen": 182452224 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.009043906815350056, + "objective/train/docs_used": 114775, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.025790691375732, + "objective/train/original_loss": 5.025790691375732, + "objective/train/theoretical_loss": 4.40280930349784, + "objective/train/tokens_used": 203960800, + "objective/train/value_avg": -0.01708984375, + "objective/train/value_loss": 0.0029902004171162844, + "objective/train/value_max": -0.003429412841796875, + "objective/train/value_min": -0.19580078125, + "objective/train/value_reward_corr": 0.01326286202499834, + "objective/train/value_std": 0.01125335693359375, + "objective/train/weight_avg": 1.000919222831726, + "objective/train/weighted_lm_loss": 5.03108024597168, + "objective/train/weights_max": 1.019710659980774, + "objective/train/weights_min": 0.9064787030220032, + "theoretical_loss": 4.40280930349784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009540930979133227, + "loss": 2.7238, + "theoretical_loss": 4.40280930349784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009537720706260033, + "loss": 2.7462, + "theoretical_loss": 4.3996306604662, + "tokens_seen": 184549376 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.004668205976486206, + "objective/train/docs_used": 115939, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.570876598358154, + "objective/train/original_loss": 5.570875644683838, + "objective/train/theoretical_loss": 4.397852815085862, + "objective/train/tokens_used": 205599200, + "objective/train/value_avg": -0.0209808349609375, + "objective/train/value_loss": 0.008310766890645027, + "objective/train/value_max": -0.0030155181884765625, + "objective/train/value_min": -0.2021484375, + "objective/train/value_reward_corr": 0.02087211105548473, + "objective/train/value_std": 0.0203857421875, + "objective/train/weight_avg": 0.9995738863945007, + "objective/train/weighted_lm_loss": 5.566670894622803, + "objective/train/weights_max": 1.0202983617782593, + "objective/train/weights_min": 0.9059286713600159, + "theoretical_loss": 4.397852815085862, + "tokens_seen": 185139200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009534510433386838, + "loss": 2.7085, + "theoretical_loss": 4.396475051181074, + "tokens_seen": 185597952 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009531300160513644, + "loss": 2.7696, + "theoretical_loss": 4.393342180003689, + "tokens_seen": 186646528 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.005866773426532745, + "objective/train/docs_used": 116562, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.20233154296875, + "objective/train/original_loss": 5.202331066131592, + "objective/train/theoretical_loss": 4.392952155367621, + "objective/train/tokens_used": 207237600, + "objective/train/value_avg": -0.01499176025390625, + "objective/train/value_loss": 0.003918274771422148, + "objective/train/value_max": -0.0029125213623046875, + "objective/train/value_min": -0.2156982421875, + "objective/train/value_reward_corr": -0.018746006775678385, + "objective/train/value_std": 0.0099945068359375, + "objective/train/weight_avg": 1.0006057024002075, + "objective/train/weighted_lm_loss": 5.206538677215576, + "objective/train/weights_max": 1.0215771198272705, + "objective/train/weights_min": 0.9057794809341431, + "theoretical_loss": 4.392952155367621, + "tokens_seen": 186777600 + }, + { + "epoch": 0.06, + "learning_rate": 0.000952808988764045, + "loss": 2.7516, + "theoretical_loss": 4.390231756723523, + "tokens_seen": 187695104 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.01094528753310442, + "objective/train/docs_used": 117858, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.615569591522217, + "objective/train/original_loss": 5.615569114685059, + "objective/train/theoretical_loss": 4.388106213985938, + "objective/train/tokens_used": 208876000, + "objective/train/value_avg": -0.0188446044921875, + "objective/train/value_loss": 0.0015754876658320427, + "objective/train/value_max": -0.0031108856201171875, + "objective/train/value_min": -0.26708984375, + "objective/train/value_reward_corr": -0.0417573697976252, + "objective/train/value_std": 0.017578125, + "objective/train/weight_avg": 1.001102328300476, + "objective/train/weighted_lm_loss": 5.622840881347656, + "objective/train/weights_max": 1.0269863605499268, + "objective/train/weights_min": 0.9529151916503906, + "theoretical_loss": 4.388106213985938, + "tokens_seen": 188416000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009524879614767255, + "loss": 2.7296, + "theoretical_loss": 4.387143496428978, + "tokens_seen": 188743680 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009521669341894061, + "loss": 2.7608, + "theoretical_loss": 4.384077119381821, + "tokens_seen": 189792256 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.011294134892523289, + "objective/train/docs_used": 118609, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.516242504119873, + "objective/train/original_loss": 5.516242504119873, + "objective/train/theoretical_loss": 4.383313912078293, + "objective/train/tokens_used": 210514400, + "objective/train/value_avg": -0.0168609619140625, + "objective/train/value_loss": 0.0007090694853104651, + "objective/train/value_max": -0.002834320068359375, + "objective/train/value_min": -0.12115478515625, + "objective/train/value_reward_corr": -0.029884166388881514, + "objective/train/value_std": 0.01110076904296875, + "objective/train/weight_avg": 1.0011329650878906, + "objective/train/weighted_lm_loss": 5.523349761962891, + "objective/train/weights_max": 1.0120632648468018, + "objective/train/weights_min": 0.9622323513031006, + "theoretical_loss": 4.383313912078293, + "tokens_seen": 190054400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009518459069020867, + "loss": 2.7527, + "theoretical_loss": 4.381032350895292, + "tokens_seen": 190840832 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0049858661368489265, + "objective/train/docs_used": 119339, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.849750995635986, + "objective/train/original_loss": 4.849750518798828, + "objective/train/theoretical_loss": 4.37857420112113, + "objective/train/tokens_used": 212152800, + "objective/train/value_avg": -0.015411376953125, + "objective/train/value_loss": 0.0012705379631370306, + "objective/train/value_max": -0.0030994415283203125, + "objective/train/value_min": -0.1776123046875, + "objective/train/value_reward_corr": 0.039637398644800995, + "objective/train/value_std": 0.00937652587890625, + "objective/train/weight_avg": 1.0005048513412476, + "objective/train/weighted_lm_loss": 4.854963779449463, + "objective/train/weights_max": 1.0177489519119263, + "objective/train/weights_min": 0.9379817843437195, + "theoretical_loss": 4.37857420112113, + "tokens_seen": 191692800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009515248796147673, + "loss": 2.7427, + "theoretical_loss": 4.378008921215717, + "tokens_seen": 191889408 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009512038523274478, + "loss": 2.7703, + "theoretical_loss": 4.375006565407541, + "tokens_seen": 192937984 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.008535378612577915, + "objective/train/docs_used": 120055, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.2026190757751465, + "objective/train/original_loss": 6.202618598937988, + "objective/train/theoretical_loss": 4.373886061826036, + "objective/train/tokens_used": 213791200, + "objective/train/value_avg": -0.019500732421875, + "objective/train/value_loss": 0.001922689494676888, + "objective/train/value_max": -0.0029582977294921875, + "objective/train/value_min": -0.11676025390625, + "objective/train/value_reward_corr": 0.06235634992029809, + "objective/train/value_std": 0.011077880859375, + "objective/train/weight_avg": 1.0008630752563477, + "objective/train/weighted_lm_loss": 6.208310604095459, + "objective/train/weights_max": 1.0090868473052979, + "objective/train/weights_min": 0.9477912783622742, + "theoretical_loss": 4.373886061826036, + "tokens_seen": 193331200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009508828250401285, + "loss": 2.7794, + "theoretical_loss": 4.372025023241637, + "tokens_seen": 193986560 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.009271165356040001, + "objective/train/docs_used": 121265, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.654363632202148, + "objective/train/original_loss": 5.654363632202148, + "objective/train/theoretical_loss": 4.369248503085039, + "objective/train/tokens_used": 215429600, + "objective/train/value_avg": -0.0174407958984375, + "objective/train/value_loss": 0.0025286474265158176, + "objective/train/value_max": -0.0025806427001953125, + "objective/train/value_min": -0.1700439453125, + "objective/train/value_reward_corr": -0.008456394684927555, + "objective/train/value_std": 0.01171875, + "objective/train/weight_avg": 1.0009397268295288, + "objective/train/weighted_lm_loss": 5.65994119644165, + "objective/train/weights_max": 1.0170485973358154, + "objective/train/weights_min": 0.9216285943984985, + "theoretical_loss": 4.369248503085039, + "tokens_seen": 194969600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009505617977528091, + "loss": 2.7674, + "theoretical_loss": 4.3690640390867985, + "tokens_seen": 195035136 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009502407704654897, + "loss": 2.7732, + "theoretical_loss": 4.366123361804301, + "tokens_seen": 196083712 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.004541816655546427, + "objective/train/docs_used": 121711, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.728897571563721, + "objective/train/original_loss": 5.728898048400879, + "objective/train/theoretical_loss": 4.364660560962464, + "objective/train/tokens_used": 217068000, + "objective/train/value_avg": -0.01934814453125, + "objective/train/value_loss": 0.008198734372854233, + "objective/train/value_max": -0.0030269622802734375, + "objective/train/value_min": -0.126220703125, + "objective/train/value_reward_corr": 0.005732466692822846, + "objective/train/value_std": 0.01239776611328125, + "objective/train/weight_avg": 1.0004942417144775, + "objective/train/weighted_lm_loss": 5.732331275939941, + "objective/train/weights_max": 1.012599229812622, + "objective/train/weights_min": 0.9079968929290771, + "theoretical_loss": 4.364660560962464, + "tokens_seen": 196608000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009499197431781702, + "loss": 2.7307, + "theoretical_loss": 4.363202744645427, + "tokens_seen": 197132288 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009495987158908508, + "loss": 2.7494, + "theoretical_loss": 4.360301945151863, + "tokens_seen": 198180864 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.005229955539107323, + "objective/train/docs_used": 122248, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.548094272613525, + "objective/train/original_loss": 5.548094272613525, + "objective/train/theoretical_loss": 4.360121297730904, + "objective/train/tokens_used": 218706400, + "objective/train/value_avg": -0.01837158203125, + "objective/train/value_loss": 0.0021343128755688667, + "objective/train/value_max": -0.004100799560546875, + "objective/train/value_min": -0.1475830078125, + "objective/train/value_reward_corr": 0.019341521906218857, + "objective/train/value_std": 0.01239776611328125, + "objective/train/weight_avg": 1.0005335807800293, + "objective/train/weighted_lm_loss": 5.551665306091309, + "objective/train/weights_max": 1.0144551992416382, + "objective/train/weights_min": 0.9468933939933777, + "theoretical_loss": 4.360121297730904, + "tokens_seen": 198246400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009492776886035314, + "loss": 2.7017, + "theoretical_loss": 4.357420725058867, + "tokens_seen": 199229440 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.014815264381468296, + "objective/train/docs_used": 123553, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.386078357696533, + "objective/train/original_loss": 5.386077404022217, + "objective/train/theoretical_loss": 4.355629800949043, + "objective/train/tokens_used": 220344800, + "objective/train/value_avg": -0.0182342529296875, + "objective/train/value_loss": 0.0011109965853393078, + "objective/train/value_max": -0.0029697418212890625, + "objective/train/value_min": -0.151123046875, + "objective/train/value_reward_corr": 0.003961836697309472, + "objective/train/value_std": 0.01381683349609375, + "objective/train/weight_avg": 1.0014870166778564, + "objective/train/weighted_lm_loss": 5.394365310668945, + "objective/train/weights_max": 1.015122413635254, + "objective/train/weights_min": 0.9186387658119202, + "theoretical_loss": 4.355629800949043, + "tokens_seen": 199884800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009489566613162119, + "loss": 2.7042, + "theoretical_loss": 4.354558850201118, + "tokens_seen": 200278016 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009486356340288925, + "loss": 2.7387, + "theoretical_loss": 4.351716090421165, + "tokens_seen": 201326592 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.06334809213876724, + "objective/train/docs_used": 124253, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.290553569793701, + "objective/train/original_loss": 5.290553092956543, + "objective/train/theoretical_loss": 4.351185182579177, + "objective/train/tokens_used": 221983200, + "objective/train/value_avg": -0.0165252685546875, + "objective/train/value_loss": 0.055041175335645676, + "objective/train/value_max": -0.0035800933837890625, + "objective/train/value_min": -0.1566162109375, + "objective/train/value_reward_corr": -0.06369074746238335, + "objective/train/value_std": 0.0098114013671875, + "objective/train/weight_avg": 0.9939332008361816, + "objective/train/weighted_lm_loss": 5.271691799163818, + "objective/train/weights_max": 1.011613368988037, + "objective/train/weights_min": 0.9062157869338989, + "theoretical_loss": 4.351185182579177, + "tokens_seen": 201523200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009483146067415731, + "loss": 2.734, + "theoretical_loss": 4.348892219480378, + "tokens_seen": 202375168 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.009640741162002087, + "objective/train/docs_used": 125838, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.487208366394043, + "objective/train/original_loss": 5.487208366394043, + "objective/train/theoretical_loss": 4.3467865781424315, + "objective/train/tokens_used": 223621600, + "objective/train/value_avg": -0.0192718505859375, + "objective/train/value_loss": 0.0033801456447690725, + "objective/train/value_max": -0.0029926300048828125, + "objective/train/value_min": -0.127685546875, + "objective/train/value_reward_corr": 0.006243336261065472, + "objective/train/value_std": 0.01184844970703125, + "objective/train/weight_avg": 1.0009806156158447, + "objective/train/weighted_lm_loss": 5.493793964385986, + "objective/train/weights_max": 1.012785792350769, + "objective/train/weights_min": 0.9107123017311096, + "theoretical_loss": 4.3467865781424315, + "tokens_seen": 203161600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009479935794542537, + "loss": 2.6818, + "theoretical_loss": 4.346087014972328, + "tokens_seen": 203423744 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009476725521669341, + "loss": 2.6707, + "theoretical_loss": 4.343300258238523, + "tokens_seen": 204472320 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.0002033850905718282, + "objective/train/docs_used": 126453, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.34235143661499, + "objective/train/original_loss": 5.342350482940674, + "objective/train/theoretical_loss": 4.342433145909755, + "objective/train/tokens_used": 225260000, + "objective/train/value_avg": -0.0159759521484375, + "objective/train/value_loss": 0.006266888231039047, + "objective/train/value_max": -0.0030517578125, + "objective/train/value_min": -0.11456298828125, + "objective/train/value_reward_corr": 0.02369450348230506, + "objective/train/value_std": 0.0101318359375, + "objective/train/weight_avg": 1.000010371208191, + "objective/train/weighted_lm_loss": 5.343844413757324, + "objective/train/weights_max": 1.011457920074463, + "objective/train/weights_min": 0.9064819812774658, + "theoretical_loss": 4.342433145909755, + "tokens_seen": 204800000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009473515248796147, + "loss": 2.6824, + "theoretical_loss": 4.34053173428641, + "tokens_seen": 205520896 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.006954561918973923, + "objective/train/docs_used": 127848, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.012685775756836, + "objective/train/original_loss": 5.012686729431152, + "objective/train/theoretical_loss": 4.33812406612692, + "objective/train/tokens_used": 226898400, + "objective/train/value_avg": -0.0188140869140625, + "objective/train/value_loss": 0.01265235710889101, + "objective/train/value_max": -0.003635406494140625, + "objective/train/value_min": -0.1492919921875, + "objective/train/value_reward_corr": -0.01662154598903637, + "objective/train/value_std": 0.01323699951171875, + "objective/train/weight_avg": 0.9993664026260376, + "objective/train/weighted_lm_loss": 5.008593559265137, + "objective/train/weights_max": 1.0149849653244019, + "objective/train/weights_min": 0.9074967503547668, + "theoretical_loss": 4.33812406612692, + "tokens_seen": 206438400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009470304975922953, + "loss": 2.693, + "theoretical_loss": 4.337781231709587, + "tokens_seen": 206569472 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009467094703049759, + "loss": 2.6811, + "theoretical_loss": 4.3350485426101395, + "tokens_seen": 207618048 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.00698684761300683, + "objective/train/docs_used": 128338, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.860535621643066, + "objective/train/original_loss": 4.860536575317383, + "objective/train/theoretical_loss": 4.333858540271834, + "objective/train/tokens_used": 228536800, + "objective/train/value_avg": -0.0185699462890625, + "objective/train/value_loss": 0.0008876342326402664, + "objective/train/value_max": -0.0028896331787109375, + "objective/train/value_min": -0.1842041015625, + "objective/train/value_reward_corr": -0.08067359948891911, + "objective/train/value_std": 0.01497650146484375, + "objective/train/weight_avg": 1.0007030963897705, + "objective/train/weighted_lm_loss": 4.864603519439697, + "objective/train/weights_max": 1.0178604125976562, + "objective/train/weights_min": 0.97736656665802, + "theoretical_loss": 4.333858540271834, + "tokens_seen": 208076800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009463884430176565, + "loss": 2.6729, + "theoretical_loss": 4.332333462523044, + "tokens_seen": 208666624 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.01151793822646141, + "objective/train/docs_used": 129483, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.982391357421875, + "objective/train/original_loss": 4.982390880584717, + "objective/train/theoretical_loss": 4.3296357903425715, + "objective/train/tokens_used": 230175200, + "objective/train/value_avg": -0.0183868408203125, + "objective/train/value_loss": 0.0027273804880678654, + "objective/train/value_max": -0.0034961700439453125, + "objective/train/value_min": -0.187255859375, + "objective/train/value_reward_corr": 0.02984524867917985, + "objective/train/value_std": 0.01157379150390625, + "objective/train/weight_avg": 1.0011651515960693, + "objective/train/weighted_lm_loss": 4.988529205322266, + "objective/train/weights_max": 1.0187970399856567, + "objective/train/weights_min": 0.9069280028343201, + "theoretical_loss": 4.3296357903425715, + "tokens_seen": 209715200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009460674157303371, + "loss": 2.6694, + "theoretical_loss": 4.3296357903425715, + "tokens_seen": 209715200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009457463884430177, + "loss": 2.6923, + "theoretical_loss": 4.326955328250631, + "tokens_seen": 210763776 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.006293178535997868, + "objective/train/docs_used": 130263, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.640300273895264, + "objective/train/original_loss": 5.640300273895264, + "objective/train/theoretical_loss": 4.325455058174634, + "objective/train/tokens_used": 231813600, + "objective/train/value_avg": -0.017730712890625, + "objective/train/value_loss": 0.00163785379845649, + "objective/train/value_max": -0.003063201904296875, + "objective/train/value_min": -0.188720703125, + "objective/train/value_reward_corr": 0.046772876029546606, + "objective/train/value_std": 0.0114593505859375, + "objective/train/weight_avg": 1.000637412071228, + "objective/train/weighted_lm_loss": 5.645237922668457, + "objective/train/weights_max": 1.0189878940582275, + "objective/train/weights_min": 0.9498488903045654, + "theoretical_loss": 4.325455058174634, + "tokens_seen": 211353600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009454253611556982, + "loss": 2.6991, + "theoretical_loss": 4.324291881646978, + "tokens_seen": 211812352 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009451043338683788, + "loss": 2.7188, + "theoretical_loss": 4.321645259081256, + "tokens_seen": 212860928 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.006505795754492283, + "objective/train/docs_used": 131630, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.778900623321533, + "objective/train/original_loss": 5.778900623321533, + "objective/train/theoretical_loss": 4.321315604786012, + "objective/train/tokens_used": 233452000, + "objective/train/value_avg": -0.01727294921875, + "objective/train/value_loss": 0.0066181691363453865, + "objective/train/value_max": -0.0029582977294921875, + "objective/train/value_min": -0.2451171875, + "objective/train/value_reward_corr": -0.02943643253264096, + "objective/train/value_std": 0.01436614990234375, + "objective/train/weight_avg": 0.9993821382522583, + "objective/train/weighted_lm_loss": 5.776949405670166, + "objective/train/weights_max": 1.0247461795806885, + "objective/train/weights_min": 0.9223341345787048, + "theoretical_loss": 4.321315604786012, + "tokens_seen": 212992000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009447833065810594, + "loss": 2.6701, + "theoretical_loss": 4.3190152721867925, + "tokens_seen": 213909504 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.004049733746796846, + "objective/train/docs_used": 132228, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.899166584014893, + "objective/train/original_loss": 5.899166107177734, + "objective/train/theoretical_loss": 4.317216709748722, + "objective/train/tokens_used": 235090400, + "objective/train/value_avg": -0.0203704833984375, + "objective/train/value_loss": 0.009923080913722515, + "objective/train/value_max": -0.0032863616943359375, + "objective/train/value_min": -0.18994140625, + "objective/train/value_reward_corr": -0.0012145317241601347, + "objective/train/value_std": 0.01367950439453125, + "objective/train/weight_avg": 1.0004534721374512, + "objective/train/weighted_lm_loss": 5.9028544425964355, + "objective/train/weights_max": 1.0191115140914917, + "objective/train/weights_min": 0.905919075012207, + "theoretical_loss": 4.317216709748722, + "tokens_seen": 214630400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009444622792937399, + "loss": 2.7091, + "theoretical_loss": 4.3164017356160995, + "tokens_seen": 214958080 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009441412520064205, + "loss": 2.6704, + "theoretical_loss": 4.313804466978039, + "tokens_seen": 216006656 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": -0.01473494153469801, + "objective/train/docs_used": 133529, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.430199146270752, + "objective/train/original_loss": 5.430198669433594, + "objective/train/theoretical_loss": 4.313157670585552, + "objective/train/tokens_used": 236728800, + "objective/train/value_avg": -0.0195159912109375, + "objective/train/value_loss": 0.007753440644592047, + "objective/train/value_max": -0.003147125244140625, + "objective/train/value_min": -0.1649169921875, + "objective/train/value_reward_corr": 0.19047934977585018, + "objective/train/value_std": 0.0129241943359375, + "objective/train/weight_avg": 0.9985648393630981, + "objective/train/weighted_lm_loss": 5.421947479248047, + "objective/train/weights_max": 1.0162074565887451, + "objective/train/weights_min": 0.9340464472770691, + "theoretical_loss": 4.313157670585552, + "tokens_seen": 216268800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009438202247191011, + "loss": 2.673, + "theoretical_loss": 4.311223286776586, + "tokens_seen": 217055232 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": -0.0068270303308963776, + "objective/train/docs_used": 134228, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.763496398925781, + "objective/train/original_loss": 5.763495445251465, + "objective/train/theoretical_loss": 4.309137802190812, + "objective/train/tokens_used": 238367200, + "objective/train/value_avg": -0.021270751953125, + "objective/train/value_loss": 0.007056256756186485, + "objective/train/value_max": -0.003223419189453125, + "objective/train/value_min": -0.17626953125, + "objective/train/value_reward_corr": 0.15295256351051723, + "objective/train/value_std": 0.014373779296875, + "objective/train/weight_avg": 0.9993520379066467, + "objective/train/weighted_lm_loss": 5.758028984069824, + "objective/train/weights_max": 1.0162217617034912, + "objective/train/weights_min": 0.9235442280769348, + "theoretical_loss": 4.309137802190812, + "tokens_seen": 217907200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009434991974317817, + "loss": 2.6668, + "theoretical_loss": 4.3086580183511565, + "tokens_seen": 218103808 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009431781701444622, + "loss": 2.6669, + "theoretical_loss": 4.306108487818438, + "tokens_seen": 219152384 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.013936464674770832, + "objective/train/docs_used": 135676, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.5125532150268555, + "objective/train/original_loss": 5.512553691864014, + "objective/train/theoretical_loss": 4.305156436273988, + "objective/train/tokens_used": 240005600, + "objective/train/value_avg": -0.0184783935546875, + "objective/train/value_loss": 0.0007793743279762566, + "objective/train/value_max": -0.00307464599609375, + "objective/train/value_min": -0.1689453125, + "objective/train/value_reward_corr": 0.01536073343718132, + "objective/train/value_std": 0.01299285888671875, + "objective/train/weight_avg": 1.0013974905014038, + "objective/train/weighted_lm_loss": 5.520252704620361, + "objective/train/weights_max": 1.0169731378555298, + "objective/train/weights_min": 0.9549798965454102, + "theoretical_loss": 4.305156436273988, + "tokens_seen": 219545600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009428571428571429, + "loss": 2.6707, + "theoretical_loss": 4.3035745240156915, + "tokens_seen": 220200960 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.012835163623094559, + "objective/train/docs_used": 136371, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 6.019532203674316, + "objective/train/original_loss": 6.019532203674316, + "objective/train/theoretical_loss": 4.3012129208251935, + "objective/train/tokens_used": 241644000, + "objective/train/value_avg": -0.016845703125, + "objective/train/value_loss": 0.0010000005131587386, + "objective/train/value_max": -0.0028228759765625, + "objective/train/value_min": -0.1588134765625, + "objective/train/value_reward_corr": 0.004276335114376484, + "objective/train/value_std": 0.011505126953125, + "objective/train/weight_avg": 1.0012885332107544, + "objective/train/weighted_lm_loss": 6.027918338775635, + "objective/train/weights_max": 1.015946626663208, + "objective/train/weights_min": 0.9531607031822205, + "theoretical_loss": 4.3012129208251935, + "tokens_seen": 221184000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009425361155698235, + "loss": 2.7029, + "theoretical_loss": 4.301055958445467, + "tokens_seen": 221249536 + }, + { + "epoch": 0.07, + "learning_rate": 0.000942215088282504, + "loss": 2.6412, + "theoretical_loss": 4.2985526252217054, + "tokens_seen": 222298112 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.01144590973854065, + "objective/train/docs_used": 137650, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.541260242462158, + "objective/train/original_loss": 5.541261196136475, + "objective/train/theoretical_loss": 4.297306619601446, + "objective/train/tokens_used": 243282400, + "objective/train/value_avg": -0.0175323486328125, + "objective/train/value_loss": 0.0018446120666339993, + "objective/train/value_max": -0.0028553009033203125, + "objective/train/value_min": -0.2076416015625, + "objective/train/value_reward_corr": -0.022123123071625703, + "objective/train/value_std": 0.01090240478515625, + "objective/train/weight_avg": 1.001153588294983, + "objective/train/weighted_lm_loss": 5.548229694366455, + "objective/train/weights_max": 1.0207871198654175, + "objective/train/weights_min": 0.9161787629127502, + "theoretical_loss": 4.297306619601446, + "tokens_seen": 222822400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009418940609951846, + "loss": 2.6828, + "theoretical_loss": 4.296064361017181, + "tokens_seen": 223346688 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009415730337078652, + "loss": 2.6794, + "theoretical_loss": 4.293591005012228, + "tokens_seen": 224395264 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.007739283610135317, + "objective/train/docs_used": 138458, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.349739074707031, + "objective/train/original_loss": 5.349740028381348, + "objective/train/theoretical_loss": 4.29343691163279, + "objective/train/tokens_used": 244920800, + "objective/train/value_avg": -0.017578125, + "objective/train/value_loss": 0.002496065804734826, + "objective/train/value_max": -0.0034427642822265625, + "objective/train/value_min": -0.1444091796875, + "objective/train/value_reward_corr": 0.003156878699630653, + "objective/train/value_std": 0.01105499267578125, + "objective/train/weight_avg": 1.0007860660552979, + "objective/train/weighted_lm_loss": 5.354698181152344, + "objective/train/weights_max": 1.0144718885421753, + "objective/train/weights_min": 0.9124878644943237, + "theoretical_loss": 4.29343691163279, + "tokens_seen": 224460800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009412520064205458, + "loss": 2.6463, + "theoretical_loss": 4.291132398844749, + "tokens_seen": 225443840 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.005272321868687868, + "objective/train/docs_used": 139641, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.739506721496582, + "objective/train/original_loss": 5.739508152008057, + "objective/train/theoretical_loss": 4.289603190747359, + "objective/train/tokens_used": 246559200, + "objective/train/value_avg": -0.01555633544921875, + "objective/train/value_loss": 0.003915421199053526, + "objective/train/value_max": -0.00307464599609375, + "objective/train/value_min": -0.09588623046875, + "objective/train/value_reward_corr": 0.00948548841245023, + "objective/train/value_std": 0.00972747802734375, + "objective/train/weight_avg": 1.0005464553833008, + "objective/train/weighted_lm_loss": 5.74354362487793, + "objective/train/weights_max": 1.009427785873413, + "objective/train/weights_min": 0.922980010509491, + "theoretical_loss": 4.289603190747359, + "tokens_seen": 226099200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009409309791332263, + "loss": 2.6844, + "theoretical_loss": 4.2886883865614305, + "tokens_seen": 226492416 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009406099518459069, + "loss": 2.7018, + "theoretical_loss": 4.286258814570154, + "tokens_seen": 227540992 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0032209716737270355, + "objective/train/docs_used": 140376, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.564426898956299, + "objective/train/original_loss": 5.564426898956299, + "objective/train/theoretical_loss": 4.2858048651145335, + "objective/train/tokens_used": 248197600, + "objective/train/value_avg": -0.01551055908203125, + "objective/train/value_loss": 0.004238619469106197, + "objective/train/value_max": -0.0036067962646484375, + "objective/train/value_min": -0.138671875, + "objective/train/value_reward_corr": -0.008118835810016295, + "objective/train/value_std": 0.009674072265625, + "objective/train/weight_avg": 1.0003429651260376, + "objective/train/weighted_lm_loss": 5.567164421081543, + "objective/train/weights_max": 1.0138750076293945, + "objective/train/weights_min": 0.9070465564727783, + "theoretical_loss": 4.2858048651145335, + "tokens_seen": 227737600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009402889245585875, + "loss": 2.7041, + "theoretical_loss": 4.283843531593567, + "tokens_seen": 228589568 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": -0.006069748662412167, + "objective/train/docs_used": 141693, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.237890720367432, + "objective/train/original_loss": 5.237890243530273, + "objective/train/theoretical_loss": 4.282041356805376, + "objective/train/tokens_used": 249836000, + "objective/train/value_avg": -0.016021728515625, + "objective/train/value_loss": 0.009178967215120792, + "objective/train/value_max": -0.00323486328125, + "objective/train/value_min": -0.1534423828125, + "objective/train/value_reward_corr": -0.03061892565898365, + "objective/train/value_std": 0.0091552734375, + "objective/train/weight_avg": 0.9994378089904785, + "objective/train/weighted_lm_loss": 5.239619731903076, + "objective/train/weights_max": 1.015385627746582, + "objective/train/weights_min": 0.9170610904693604, + "theoretical_loss": 4.282041356805376, + "tokens_seen": 229376000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009399678972712681, + "loss": 2.6847, + "theoretical_loss": 4.281442388623764, + "tokens_seen": 229638144 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009396468699839486, + "loss": 2.6411, + "theoretical_loss": 4.279055238878065, + "tokens_seen": 230686720 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.01201572548598051, + "objective/train/docs_used": 142407, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.8672027587890625, + "objective/train/original_loss": 5.867203712463379, + "objective/train/theoretical_loss": 4.278312101369555, + "objective/train/tokens_used": 251474400, + "objective/train/value_avg": -0.016998291015625, + "objective/train/value_loss": 0.00231100688688457, + "objective/train/value_max": -0.0028781890869140625, + "objective/train/value_min": -0.18505859375, + "objective/train/value_reward_corr": 0.015502911610877149, + "objective/train/value_std": 0.01230621337890625, + "objective/train/weight_avg": 1.0012128353118896, + "objective/train/weighted_lm_loss": 5.874997615814209, + "objective/train/weights_max": 1.0182081460952759, + "objective/train/weights_min": 0.9117130637168884, + "theoretical_loss": 4.278312101369555, + "tokens_seen": 231014400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009393258426966292, + "loss": 2.6304, + "theoretical_loss": 4.276681937755853, + "tokens_seen": 231735296 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.00904279574751854, + "objective/train/docs_used": 143093, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.456385135650635, + "objective/train/original_loss": 5.456386089324951, + "objective/train/theoretical_loss": 4.274616547428058, + "objective/train/tokens_used": 253112800, + "objective/train/value_avg": -0.0178375244140625, + "objective/train/value_loss": 0.0023171850480139256, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.150146484375, + "objective/train/value_reward_corr": 0.0032721938175134385, + "objective/train/value_std": 0.01287078857421875, + "objective/train/weight_avg": 1.0009156465530396, + "objective/train/weighted_lm_loss": 5.461907386779785, + "objective/train/weights_max": 1.0150251388549805, + "objective/train/weights_min": 0.9103960394859314, + "theoretical_loss": 4.274616547428058, + "tokens_seen": 232652800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009390048154093099, + "loss": 2.664, + "theoretical_loss": 4.274322342796429, + "tokens_seen": 232783872 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009386837881219904, + "loss": 2.642, + "theoretical_loss": 4.271976313637885, + "tokens_seen": 233832448 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.005932926200330257, + "objective/train/docs_used": 144323, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.713996410369873, + "objective/train/original_loss": 5.713995456695557, + "objective/train/theoretical_loss": 4.2709541562809665, + "objective/train/tokens_used": 254751200, + "objective/train/value_avg": -0.0169677734375, + "objective/train/value_loss": 0.0017901872051879764, + "objective/train/value_max": -0.0027675628662109375, + "objective/train/value_min": -0.1357421875, + "objective/train/value_reward_corr": -0.10462109851144669, + "objective/train/value_std": 0.010498046875, + "objective/train/weight_avg": 1.0006022453308105, + "objective/train/weighted_lm_loss": 5.719018936157227, + "objective/train/weights_max": 1.0136080980300903, + "objective/train/weights_min": 0.9702244997024536, + "theoretical_loss": 4.2709541562809665, + "tokens_seen": 234291200 + }, + { + "epoch": 0.07, + "learning_rate": 0.000938362760834671, + "loss": 2.6673, + "theoretical_loss": 4.269643711976926, + "tokens_seen": 234881024 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.004251271951943636, + "objective/train/docs_used": 145084, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.917726993560791, + "objective/train/original_loss": 4.917726993560791, + "objective/train/theoretical_loss": 4.267324401529657, + "objective/train/tokens_used": 256389600, + "objective/train/value_avg": -0.0168914794921875, + "objective/train/value_loss": 0.002068107482045889, + "objective/train/value_max": -0.0033893585205078125, + "objective/train/value_min": -0.156494140625, + "objective/train/value_reward_corr": -0.01782792161771601, + "objective/train/value_std": 0.00893402099609375, + "objective/train/weight_avg": 1.0004353523254395, + "objective/train/weighted_lm_loss": 4.921153545379639, + "objective/train/weights_max": 1.014171838760376, + "objective/train/weights_min": 0.951495885848999, + "theoretical_loss": 4.267324401529657, + "tokens_seen": 235929600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009380417335473516, + "loss": 2.6116, + "theoretical_loss": 4.267324401529657, + "tokens_seen": 235929600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009377207062600322, + "loss": 2.6348, + "theoretical_loss": 4.265018247993272, + "tokens_seen": 236978176 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.009723395109176636, + "objective/train/docs_used": 146518, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.177943706512451, + "objective/train/original_loss": 5.177943706512451, + "objective/train/theoretical_loss": 4.263726768712791, + "objective/train/tokens_used": 258028000, + "objective/train/value_avg": -0.0150299072265625, + "objective/train/value_loss": 0.00048504778533242643, + "objective/train/value_max": -0.0028896331787109375, + "objective/train/value_min": -0.173583984375, + "objective/train/value_reward_corr": -0.0913488105432693, + "objective/train/value_std": 0.0099334716796875, + "objective/train/weight_avg": 1.0009747743606567, + "objective/train/weighted_lm_loss": 5.183785438537598, + "objective/train/weights_max": 1.0173686742782593, + "objective/train/weights_min": 0.9750662446022034, + "theoretical_loss": 4.263726768712791, + "tokens_seen": 237568000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009373996789727127, + "loss": 2.6294, + "theoretical_loss": 4.262725119008646, + "tokens_seen": 238026752 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009370786516853933, + "loss": 2.6223, + "theoretical_loss": 4.260444884123785, + "tokens_seen": 239075328 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.012284423224627972, + "objective/train/docs_used": 147039, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.664731502532959, + "objective/train/original_loss": 5.664732456207275, + "objective/train/theoretical_loss": 4.260160754955504, + "objective/train/tokens_used": 259666400, + "objective/train/value_avg": -0.0187530517578125, + "objective/train/value_loss": 0.003126369556412101, + "objective/train/value_max": -0.0034427642822265625, + "objective/train/value_min": -0.174560546875, + "objective/train/value_reward_corr": -0.001074846509371838, + "objective/train/value_std": 0.01059722900390625, + "objective/train/weight_avg": 1.0012437105178833, + "objective/train/weighted_lm_loss": 5.672114372253418, + "objective/train/weights_max": 1.0175472497940063, + "objective/train/weights_min": 0.9098713994026184, + "theoretical_loss": 4.260160754955504, + "tokens_seen": 239206400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009367576243980739, + "loss": 2.6391, + "theoretical_loss": 4.258177414758135, + "tokens_seen": 240123904 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": -0.011190192773938179, + "objective/train/docs_used": 148118, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.982842922210693, + "objective/train/original_loss": 4.982841968536377, + "objective/train/theoretical_loss": 4.256625868631222, + "objective/train/tokens_used": 261304800, + "objective/train/value_avg": -0.0198974609375, + "objective/train/value_loss": 0.0099484296515584, + "objective/train/value_max": -0.0035648345947265625, + "objective/train/value_min": -0.1885986328125, + "objective/train/value_reward_corr": 0.0671826818392787, + "objective/train/value_std": 0.012176513671875, + "objective/train/weight_avg": 0.998929500579834, + "objective/train/weighted_lm_loss": 4.978255271911621, + "objective/train/weights_max": 1.0177881717681885, + "objective/train/weights_min": 0.9058239459991455, + "theoretical_loss": 4.256625868631222, + "tokens_seen": 240844800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009364365971107544, + "loss": 2.6919, + "theoretical_loss": 4.25592258416769, + "tokens_seen": 241172480 + }, + { + "epoch": 0.07, + "learning_rate": 0.000936115569823435, + "loss": 2.6261, + "theoretical_loss": 4.253680267410921, + "tokens_seen": 242221056 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.008330612443387508, + "objective/train/docs_used": 148826, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.625147342681885, + "objective/train/original_loss": 5.625147819519043, + "objective/train/theoretical_loss": 4.253121629035574, + "objective/train/tokens_used": 262943200, + "objective/train/value_avg": -0.0198974609375, + "objective/train/value_loss": 0.002775331260636449, + "objective/train/value_max": -0.0033626556396484375, + "objective/train/value_min": -0.153076171875, + "objective/train/value_reward_corr": 0.14139235308455608, + "objective/train/value_std": 0.0149078369140625, + "objective/train/weight_avg": 1.0008467435836792, + "objective/train/weighted_lm_loss": 5.629456996917725, + "objective/train/weights_max": 1.0153448581695557, + "objective/train/weights_min": 0.9266049861907959, + "theoretical_loss": 4.253121629035574, + "tokens_seen": 242483200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009357945425361156, + "loss": 2.6501, + "theoretical_loss": 4.251450341315464, + "tokens_seen": 243269632 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.004722317215055227, + "objective/train/docs_used": 150000, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.325284957885742, + "objective/train/original_loss": 5.325284004211426, + "objective/train/theoretical_loss": 4.249647566071895, + "objective/train/tokens_used": 264581600, + "objective/train/value_avg": -0.0162353515625, + "objective/train/value_loss": 0.0009544612257741392, + "objective/train/value_max": -0.00319671630859375, + "objective/train/value_min": -0.1302490234375, + "objective/train/value_reward_corr": -0.16371479345823173, + "objective/train/value_std": 0.01015472412109375, + "objective/train/weight_avg": 1.0004769563674927, + "objective/train/weighted_lm_loss": 5.328819274902344, + "objective/train/weights_max": 1.0130221843719482, + "objective/train/weights_min": 0.9791459441184998, + "theoretical_loss": 4.249647566071895, + "tokens_seen": 244121600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009354735152487962, + "loss": 2.5934, + "theoretical_loss": 4.249232684445579, + "tokens_seen": 244318208 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009351524879614767, + "loss": 2.61, + "theoretical_loss": 4.247027177070329, + "tokens_seen": 245366784 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.004646449349820614, + "objective/train/docs_used": 150657, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.6061625480651855, + "objective/train/original_loss": 5.6061625480651855, + "objective/train/theoretical_loss": 4.246203219947814, + "objective/train/tokens_used": 266220000, + "objective/train/value_avg": -0.020172119140625, + "objective/train/value_loss": 0.006878900341689587, + "objective/train/value_max": -0.0033893585205078125, + "objective/train/value_min": -0.1934814453125, + "objective/train/value_reward_corr": 0.013545096273944016, + "objective/train/value_std": 0.0130615234375, + "objective/train/weight_avg": 1.0004982948303223, + "objective/train/weighted_lm_loss": 5.6087822914123535, + "objective/train/weights_max": 1.0194107294082642, + "objective/train/weights_min": 0.9161314964294434, + "theoretical_loss": 4.246203219947814, + "tokens_seen": 245760000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009348314606741574, + "loss": 2.6243, + "theoretical_loss": 4.24483370113249, + "tokens_seen": 246415360 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0055094617418944836, + "objective/train/docs_used": 151862, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.409593105316162, + "objective/train/original_loss": 5.409592151641846, + "objective/train/theoretical_loss": 4.242788140882488, + "objective/train/tokens_used": 267858400, + "objective/train/value_avg": -0.0184326171875, + "objective/train/value_loss": 0.005143036134541035, + "objective/train/value_max": -0.0033512115478515625, + "objective/train/value_min": -0.139404296875, + "objective/train/value_reward_corr": 0.05321445513121375, + "objective/train/value_std": 0.01070404052734375, + "objective/train/weight_avg": 1.0005762577056885, + "objective/train/weighted_lm_loss": 5.4129462242126465, + "objective/train/weights_max": 1.0139672756195068, + "objective/train/weights_min": 0.906561553478241, + "theoretical_loss": 4.242788140882488, + "tokens_seen": 247398400 + }, + { + "epoch": 0.07, + "learning_rate": 0.000934510433386838, + "loss": 2.6068, + "theoretical_loss": 4.242652140218147, + "tokens_seen": 247463936 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009341894060995185, + "loss": 2.6238, + "theoretical_loss": 4.240482379526973, + "tokens_seen": 248512512 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.006646394729614258, + "objective/train/docs_used": 152583, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.567517280578613, + "objective/train/original_loss": 5.567517280578613, + "objective/train/theoretical_loss": 4.2394018888240215, + "objective/train/tokens_used": 269496800, + "objective/train/value_avg": -0.01776123046875, + "objective/train/value_loss": 0.004004355054348707, + "objective/train/value_max": -0.00323486328125, + "objective/train/value_min": -0.17529296875, + "objective/train/value_reward_corr": 0.017918612929269728, + "objective/train/value_std": 0.0113067626953125, + "objective/train/weight_avg": 1.0006842613220215, + "objective/train/weighted_lm_loss": 5.572047233581543, + "objective/train/weights_max": 1.0176080465316772, + "objective/train/weights_min": 0.9071496725082397, + "theoretical_loss": 4.2394018888240215, + "tokens_seen": 249036800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009338683788121991, + "loss": 2.6084, + "theoretical_loss": 4.2383243058431646, + "tokens_seen": 249561088 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009335473515248797, + "loss": 2.6309, + "theoretical_loss": 4.23617780750703, + "tokens_seen": 250609664 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.011361167766153812, + "objective/train/docs_used": 153242, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.14518928527832, + "objective/train/original_loss": 5.145188808441162, + "objective/train/theoretical_loss": 4.236044033176665, + "objective/train/tokens_used": 271135200, + "objective/train/value_avg": -0.0186004638671875, + "objective/train/value_loss": 0.016557952389121056, + "objective/train/value_max": -0.003376007080078125, + "objective/train/value_min": -0.1605224609375, + "objective/train/value_reward_corr": 0.023904739900579244, + "objective/train/value_std": 0.010162353515625, + "objective/train/weight_avg": 0.9989446997642517, + "objective/train/weighted_lm_loss": 5.1369099617004395, + "objective/train/weights_max": 1.016081690788269, + "objective/train/weights_min": 0.9059036374092102, + "theoretical_loss": 4.236044033176665, + "tokens_seen": 250675200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009332263242375603, + "loss": 2.6248, + "theoretical_loss": 4.23404277438719, + "tokens_seen": 251658240 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.010963143780827522, + "objective/train/docs_used": 154565, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.42684268951416, + "objective/train/original_loss": 5.426843166351318, + "objective/train/theoretical_loss": 4.232714152537391, + "objective/train/tokens_used": 272773600, + "objective/train/value_avg": -0.0157470703125, + "objective/train/value_loss": 0.0006736197392456234, + "objective/train/value_max": -0.0030994415283203125, + "objective/train/value_min": -0.1441650390625, + "objective/train/value_reward_corr": 0.05411743440154548, + "objective/train/value_std": 0.0092010498046875, + "objective/train/weight_avg": 1.001099705696106, + "objective/train/weighted_lm_loss": 5.433028697967529, + "objective/train/weights_max": 1.0144418478012085, + "objective/train/weights_min": 0.9587525129318237, + "theoretical_loss": 4.232714152537391, + "tokens_seen": 252313600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009329052969502408, + "loss": 2.5923, + "theoretical_loss": 4.231919097853398, + "tokens_seen": 252706816 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009325842696629213, + "loss": 2.641, + "theoretical_loss": 4.2298066707499515, + "tokens_seen": 253755392 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.0011873722542077303, + "objective/train/docs_used": 156106, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.658092021942139, + "objective/train/original_loss": 5.658091068267822, + "objective/train/theoretical_loss": 4.229411834441462, + "objective/train/tokens_used": 274412000, + "objective/train/value_avg": -0.0177001953125, + "objective/train/value_loss": 0.011797117069363594, + "objective/train/value_max": -0.0030879974365234375, + "objective/train/value_min": -0.1326904296875, + "objective/train/value_reward_corr": 0.002033641754256261, + "objective/train/value_std": 0.0106964111328125, + "objective/train/weight_avg": 0.9999385476112366, + "objective/train/weighted_lm_loss": 5.657343864440918, + "objective/train/weights_max": 1.0132980346679688, + "objective/train/weights_min": 0.9061436057090759, + "theoretical_loss": 4.229411834441462, + "tokens_seen": 253952000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009322632423756019, + "loss": 2.5887, + "theoretical_loss": 4.227705387369683, + "tokens_seen": 254803968 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002829274395480752, + "objective/train/docs_used": 156630, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.00520658493042, + "objective/train/original_loss": 5.00520658493042, + "objective/train/theoretical_loss": 4.226136675116626, + "objective/train/tokens_used": 276050400, + "objective/train/value_avg": -0.020721435546875, + "objective/train/value_loss": 0.004884996917098761, + "objective/train/value_max": -0.00269317626953125, + "objective/train/value_min": -0.1761474609375, + "objective/train/value_reward_corr": 0.10623065465475957, + "objective/train/value_std": 0.0135040283203125, + "objective/train/weight_avg": 1.0003069639205933, + "objective/train/weighted_lm_loss": 5.007968902587891, + "objective/train/weights_max": 1.017707109451294, + "objective/train/weights_min": 0.9269225597381592, + "theoretical_loss": 4.226136675116626, + "tokens_seen": 255590400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009319422150882825, + "loss": 2.6108, + "theoretical_loss": 4.225615143428513, + "tokens_seen": 255852544 + }, + { + "epoch": 0.08, + "learning_rate": 0.000931621187800963, + "loss": 2.5856, + "theoretical_loss": 4.223535836040548, + "tokens_seen": 256901120 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.013872098177671432, + "objective/train/docs_used": 157258, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.6423139572143555, + "objective/train/original_loss": 4.6423139572143555, + "objective/train/theoretical_loss": 4.2228882792456055, + "objective/train/tokens_used": 277688800, + "objective/train/value_avg": -0.0168304443359375, + "objective/train/value_loss": 0.01628779247403145, + "objective/train/value_max": -0.0030517578125, + "objective/train/value_min": -0.11517333984375, + "objective/train/value_reward_corr": 0.02544506417291046, + "objective/train/value_std": 0.00848388671875, + "objective/train/weight_avg": 0.9986923933029175, + "objective/train/weighted_lm_loss": 4.637303352355957, + "objective/train/weights_max": 1.00926673412323, + "objective/train/weights_min": 0.9063841104507446, + "theoretical_loss": 4.2228882792456055, + "tokens_seen": 257228800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009313001605136436, + "loss": 2.5514, + "theoretical_loss": 4.221467363693727, + "tokens_seen": 257949696 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.0038066597189754248, + "objective/train/docs_used": 158510, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.060000896453857, + "objective/train/original_loss": 5.060000896453857, + "objective/train/theoretical_loss": 4.219666259736535, + "objective/train/tokens_used": 279327200, + "objective/train/value_avg": -0.0172882080078125, + "objective/train/value_loss": 0.007683118339627981, + "objective/train/value_max": -0.0032482147216796875, + "objective/train/value_min": -0.1390380859375, + "objective/train/value_reward_corr": 0.011934989865640013, + "objective/train/value_std": 0.00934600830078125, + "objective/train/weight_avg": 0.9996567964553833, + "objective/train/weighted_lm_loss": 5.059278964996338, + "objective/train/weights_max": 1.0126200914382935, + "objective/train/weights_min": 0.9058341383934021, + "theoretical_loss": 4.219666259736535, + "tokens_seen": 258867200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009309791332263243, + "loss": 2.5837, + "theoretical_loss": 4.219409626225975, + "tokens_seen": 258998272 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009306581059390048, + "loss": 2.5611, + "theoretical_loss": 4.217362524801874, + "tokens_seen": 260046848 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0026349734980612993, + "objective/train/docs_used": 159098, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.614067554473877, + "objective/train/original_loss": 4.614068031311035, + "objective/train/theoretical_loss": 4.216470237501046, + "objective/train/tokens_used": 280965600, + "objective/train/value_avg": -0.01922607421875, + "objective/train/value_loss": 0.002133879577741027, + "objective/train/value_max": -0.0035648345947265625, + "objective/train/value_min": -0.138916015625, + "objective/train/value_reward_corr": -0.026329480778683363, + "objective/train/value_std": 0.0111083984375, + "objective/train/weight_avg": 1.0002741813659668, + "objective/train/weighted_lm_loss": 4.618664741516113, + "objective/train/weights_max": 1.013927936553955, + "objective/train/weights_min": 0.9315813183784485, + "theoretical_loss": 4.216470237501046, + "tokens_seen": 260505600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009303370786516854, + "loss": 2.5504, + "theoretical_loss": 4.215325961889821, + "tokens_seen": 261095424 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.009819858707487583, + "objective/train/docs_used": 160106, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.034743309020996, + "objective/train/original_loss": 5.034743309020996, + "objective/train/theoretical_loss": 4.213299841239684, + "objective/train/tokens_used": 282604000, + "objective/train/value_avg": -0.01959228515625, + "objective/train/value_loss": 0.0017664137994870543, + "objective/train/value_max": -0.003185272216796875, + "objective/train/value_min": -0.234375, + "objective/train/value_reward_corr": 0.1314902989823912, + "objective/train/value_std": 0.0124969482421875, + "objective/train/weight_avg": 1.000990629196167, + "objective/train/weighted_lm_loss": 5.040003776550293, + "objective/train/weights_max": 1.0236414670944214, + "objective/train/weights_min": 0.9295957088470459, + "theoretical_loss": 4.213299841239684, + "tokens_seen": 262144000 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930016051364366, + "loss": 2.6173, + "theoretical_loss": 4.213299841239684, + "tokens_seen": 262144000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009296950240770466, + "loss": 2.5631, + "theoretical_loss": 4.211284067860909, + "tokens_seen": 263192576 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.006368339527398348, + "objective/train/docs_used": 160649, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.514211654663086, + "objective/train/original_loss": 4.514212131500244, + "objective/train/theoretical_loss": 4.210154707234386, + "objective/train/tokens_used": 284242400, + "objective/train/value_avg": -0.01617431640625, + "objective/train/value_loss": 0.007943103089928627, + "objective/train/value_max": -0.0034961700439453125, + "objective/train/value_min": -0.1939697265625, + "objective/train/value_reward_corr": -0.04417080495252218, + "objective/train/value_std": 0.010833740234375, + "objective/train/weight_avg": 0.9994020462036133, + "objective/train/weighted_lm_loss": 4.512711524963379, + "objective/train/weights_max": 1.0185158252716064, + "objective/train/weights_min": 0.9172833561897278, + "theoretical_loss": 4.210154707234386, + "tokens_seen": 263782400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009293739967897271, + "loss": 2.5623, + "theoretical_loss": 4.209278548001103, + "tokens_seen": 264241152 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009290529695024077, + "loss": 2.5717, + "theoretical_loss": 4.207283189125054, + "tokens_seen": 265289728 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.009766592644155025, + "objective/train/docs_used": 161970, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.469795227050781, + "objective/train/original_loss": 5.469795227050781, + "objective/train/theoretical_loss": 4.20703447914773, + "objective/train/tokens_used": 285880800, + "objective/train/value_avg": -0.0169219970703125, + "objective/train/value_loss": 0.0005903098499402404, + "objective/train/value_max": -0.00319671630859375, + "objective/train/value_min": -0.1588134765625, + "objective/train/value_reward_corr": 0.2518589386202416, + "objective/train/value_std": 0.010223388671875, + "objective/train/weight_avg": 1.0009795427322388, + "objective/train/weighted_lm_loss": 5.475329875946045, + "objective/train/weights_max": 1.0159482955932617, + "objective/train/weights_min": 0.9838430881500244, + "theoretical_loss": 4.20703447914773, + "tokens_seen": 265420800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009287319422150883, + "loss": 2.5935, + "theoretical_loss": 4.2052978998941954, + "tokens_seen": 266338304 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.02547341398894787, + "objective/train/docs_used": 162606, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.154658317565918, + "objective/train/original_loss": 5.154658794403076, + "objective/train/theoretical_loss": 4.203938807828708, + "objective/train/tokens_used": 287519200, + "objective/train/value_avg": -0.0202178955078125, + "objective/train/value_loss": 0.012364314869046211, + "objective/train/value_max": -0.0031604766845703125, + "objective/train/value_min": -0.143798828125, + "objective/train/value_reward_corr": 0.16667290949736696, + "objective/train/value_std": 0.01328277587890625, + "objective/train/weight_avg": 0.9975135922431946, + "objective/train/weighted_lm_loss": 5.143342018127441, + "objective/train/weights_max": 1.0129393339157104, + "objective/train/weights_min": 0.9102391004562378, + "theoretical_loss": 4.203938807828708, + "tokens_seen": 267059200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009284109149277688, + "loss": 2.5641, + "theoretical_loss": 4.203322590146491, + "tokens_seen": 267386880 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009280898876404494, + "loss": 2.5626, + "theoretical_loss": 4.2013571708767365, + "tokens_seen": 268435456 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": -0.015820598229765892, + "objective/train/docs_used": 163909, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.558199405670166, + "objective/train/original_loss": 4.558199882507324, + "objective/train/theoretical_loss": 4.200867351124762, + "objective/train/tokens_used": 289157600, + "objective/train/value_avg": -0.0188140869140625, + "objective/train/value_loss": 0.01883486844599247, + "objective/train/value_max": -0.0038547515869140625, + "objective/train/value_min": -0.2486572265625, + "objective/train/value_reward_corr": 0.02051853412401225, + "objective/train/value_std": 0.01280975341796875, + "objective/train/weight_avg": 0.9985097050666809, + "objective/train/weighted_lm_loss": 4.549055099487305, + "objective/train/weights_max": 1.0247138738632202, + "objective/train/weights_min": 0.906940758228302, + "theoretical_loss": 4.200867351124762, + "tokens_seen": 268697600 + }, + { + "epoch": 0.08, + "learning_rate": 0.00092776886035313, + "loss": 2.5515, + "theoretical_loss": 4.199401554217266, + "tokens_seen": 269484032 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.005608405917882919, + "objective/train/docs_used": 164570, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.041329860687256, + "objective/train/original_loss": 5.041329383850098, + "objective/train/theoretical_loss": 4.197819773699849, + "objective/train/tokens_used": 290796000, + "objective/train/value_avg": -0.018707275390625, + "objective/train/value_loss": 0.002231367165222764, + "objective/train/value_max": -0.0034160614013671875, + "objective/train/value_min": -0.226806640625, + "objective/train/value_reward_corr": 0.055289106270642756, + "objective/train/value_std": 0.0127410888671875, + "objective/train/weight_avg": 1.000571846961975, + "objective/train/weighted_lm_loss": 5.045241832733154, + "objective/train/weights_max": 1.0228710174560547, + "objective/train/weights_min": 0.9099597334861755, + "theoretical_loss": 4.197819773699849, + "tokens_seen": 270336000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009274478330658106, + "loss": 2.5633, + "theoretical_loss": 4.19745565341906, + "tokens_seen": 270532608 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009271268057784911, + "loss": 2.5824, + "theoretical_loss": 4.195519382833226, + "tokens_seen": 271581184 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.004173563793301582, + "objective/train/docs_used": 165961, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.959630489349365, + "objective/train/original_loss": 4.959630966186523, + "objective/train/theoretical_loss": 4.194795746858309, + "objective/train/tokens_used": 292434400, + "objective/train/value_avg": -0.0167083740234375, + "objective/train/value_loss": 0.0026441654190421104, + "objective/train/value_max": -0.0030879974365234375, + "objective/train/value_min": -0.1611328125, + "objective/train/value_reward_corr": 0.14141354881518448, + "objective/train/value_std": 0.010223388671875, + "objective/train/weight_avg": 1.0004303455352783, + "objective/train/weighted_lm_loss": 4.961060523986816, + "objective/train/weights_max": 1.015216588973999, + "objective/train/weights_min": 0.940798282623291, + "theoretical_loss": 4.194795746858309, + "tokens_seen": 271974400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009268057784911718, + "loss": 2.5572, + "theoretical_loss": 4.193592657892869, + "tokens_seen": 272629760 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.014245759695768356, + "objective/train/docs_used": 166474, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.357468605041504, + "objective/train/original_loss": 5.3574676513671875, + "objective/train/theoretical_loss": 4.191794948374304, + "objective/train/tokens_used": 294072800, + "objective/train/value_avg": -0.0219573974609375, + "objective/train/value_loss": 0.0016564101679250598, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.2362060546875, + "objective/train/value_reward_corr": 0.0645042702554181, + "objective/train/value_std": 0.0146942138671875, + "objective/train/weight_avg": 1.0014328956604004, + "objective/train/weighted_lm_loss": 5.365691661834717, + "objective/train/weights_max": 1.023294448852539, + "objective/train/weights_min": 0.9399189949035645, + "theoretical_loss": 4.191794948374304, + "tokens_seen": 273612800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009264847512038524, + "loss": 2.5345, + "theoretical_loss": 4.191675395095324, + "tokens_seen": 273678336 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009261637239165329, + "loss": 2.5605, + "theoretical_loss": 4.189767511984741, + "tokens_seen": 274726912 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.006437433883547783, + "objective/train/docs_used": 167916, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.64432430267334, + "objective/train/original_loss": 4.64432430267334, + "objective/train/theoretical_loss": 4.188817062326644, + "objective/train/tokens_used": 295711200, + "objective/train/value_avg": -0.0191497802734375, + "objective/train/value_loss": 0.00549793615937233, + "objective/train/value_max": -0.003429412841796875, + "objective/train/value_min": -0.1749267578125, + "objective/train/value_reward_corr": 0.03401703687500345, + "objective/train/value_std": 0.0098114013671875, + "objective/train/weight_avg": 1.0006704330444336, + "objective/train/weighted_lm_loss": 4.648632049560547, + "objective/train/weights_max": 1.0175901651382446, + "objective/train/weights_min": 0.9074461460113525, + "theoretical_loss": 4.188817062326644, + "tokens_seen": 275251200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009258426966292135, + "loss": 2.576, + "theoretical_loss": 4.187868927135035, + "tokens_seen": 275775488 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009255216693418941, + "loss": 2.5685, + "theoretical_loss": 4.185979560133161, + "tokens_seen": 276824064 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.008432134985923767, + "objective/train/docs_used": 168545, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.3711419105529785, + "objective/train/original_loss": 5.371142387390137, + "objective/train/theoretical_loss": 4.185861778938769, + "objective/train/tokens_used": 297349600, + "objective/train/value_avg": -0.024078369140625, + "objective/train/value_loss": 0.003521246137097478, + "objective/train/value_max": -0.0032100677490234375, + "objective/train/value_min": -0.2171630859375, + "objective/train/value_reward_corr": 0.09231719374598145, + "objective/train/value_std": 0.01806640625, + "objective/train/weight_avg": 1.000860571861267, + "objective/train/weighted_lm_loss": 5.376904487609863, + "objective/train/weights_max": 1.0218662023544312, + "objective/train/weights_min": 0.9314491152763367, + "theoretical_loss": 4.185861778938769, + "tokens_seen": 276889600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009252006420545747, + "loss": 2.578, + "theoretical_loss": 4.184099331562732, + "tokens_seen": 277872640 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.005253546871244907, + "objective/train/docs_used": 169743, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.213504314422607, + "objective/train/original_loss": 5.213504314422607, + "objective/train/theoretical_loss": 4.182928794423724, + "objective/train/tokens_used": 298988000, + "objective/train/value_avg": -0.0207672119140625, + "objective/train/value_loss": 0.0048182387836277485, + "objective/train/value_max": -0.0035800933837890625, + "objective/train/value_min": -0.167236328125, + "objective/train/value_reward_corr": 0.13306955038160212, + "objective/train/value_std": 0.01367950439453125, + "objective/train/weight_avg": 1.0005489587783813, + "objective/train/weighted_lm_loss": 5.216400623321533, + "objective/train/weights_max": 1.0129115581512451, + "objective/train/weights_min": 0.9082536697387695, + "theoretical_loss": 4.182928794423724, + "tokens_seen": 278528000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009248796147672552, + "loss": 2.5393, + "theoretical_loss": 4.182228162987963, + "tokens_seen": 278921216 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009245585874799358, + "loss": 2.5625, + "theoretical_loss": 4.18036597693793, + "tokens_seen": 279969792 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0029540802352130413, + "objective/train/docs_used": 170343, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.72364616394043, + "objective/train/original_loss": 5.723646640777588, + "objective/train/theoretical_loss": 4.180017810833923, + "objective/train/tokens_used": 300626400, + "objective/train/value_avg": -0.0206146240234375, + "objective/train/value_loss": 0.0057000769302248955, + "objective/train/value_max": -0.0030155181884765625, + "objective/train/value_min": -0.1912841796875, + "objective/train/value_reward_corr": 0.003182208046563909, + "objective/train/value_std": 0.01416015625, + "objective/train/weight_avg": 1.0003234148025513, + "objective/train/weighted_lm_loss": 5.72607946395874, + "objective/train/weights_max": 1.0132806301116943, + "objective/train/weights_min": 0.9085688591003418, + "theoretical_loss": 4.180017810833923, + "tokens_seen": 280166400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009242375601926164, + "loss": 2.5354, + "theoretical_loss": 4.178512696891136, + "tokens_seen": 281018368 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.00691023375838995, + "objective/train/docs_used": 171008, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.033203125, + "objective/train/original_loss": 5.033203125, + "objective/train/theoretical_loss": 4.177128535915539, + "objective/train/tokens_used": 302264800, + "objective/train/value_avg": -0.0181884765625, + "objective/train/value_loss": 0.0017826121766120195, + "objective/train/value_max": -0.0031108856201171875, + "objective/train/value_min": -0.11376953125, + "objective/train/value_reward_corr": -0.018131070181399027, + "objective/train/value_std": 0.01020050048828125, + "objective/train/weight_avg": 1.0006998777389526, + "objective/train/weighted_lm_loss": 5.037181377410889, + "objective/train/weights_max": 1.0113754272460938, + "objective/train/weights_min": 0.9498676061630249, + "theoretical_loss": 4.177128535915539, + "tokens_seen": 281804800 + }, + { + "epoch": 0.09, + "learning_rate": 0.000923916532905297, + "loss": 2.5167, + "theoretical_loss": 4.176668247260391, + "tokens_seen": 282066944 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009235955056179775, + "loss": 2.5394, + "theoretical_loss": 4.174832553377978, + "tokens_seen": 283115520 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0052643162198364735, + "objective/train/docs_used": 172282, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.446418762207031, + "objective/train/original_loss": 4.446417808532715, + "objective/train/theoretical_loss": 4.174260682967347, + "objective/train/tokens_used": 303903200, + "objective/train/value_avg": -0.0200042724609375, + "objective/train/value_loss": 0.003910416271537542, + "objective/train/value_max": -0.0038547515869140625, + "objective/train/value_min": -0.1734619140625, + "objective/train/value_reward_corr": 0.0347586992857599, + "objective/train/value_std": 0.010345458984375, + "objective/train/weight_avg": 1.0005455017089844, + "objective/train/weighted_lm_loss": 4.449129581451416, + "objective/train/weights_max": 1.0164482593536377, + "objective/train/weights_min": 0.9123042225837708, + "theoretical_loss": 4.174260682967347, + "tokens_seen": 283443200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009232744783306581, + "loss": 2.5399, + "theoretical_loss": 4.173005541481111, + "tokens_seen": 284164096 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.010877039283514023, + "objective/train/docs_used": 172985, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.503680229187012, + "objective/train/original_loss": 5.5036797523498535, + "objective/train/theoretical_loss": 4.171413970703851, + "objective/train/tokens_used": 305541600, + "objective/train/value_avg": -0.021484375, + "objective/train/value_loss": 0.00532505102455616, + "objective/train/value_max": -0.00395965576171875, + "objective/train/value_min": -0.1883544921875, + "objective/train/value_reward_corr": 0.07668736451132205, + "objective/train/value_std": 0.01197052001953125, + "objective/train/weight_avg": 1.001113772392273, + "objective/train/weighted_lm_loss": 5.510350227355957, + "objective/train/weights_max": 1.018951177597046, + "objective/train/weights_min": 0.9104436039924622, + "theoretical_loss": 4.171413970703851, + "tokens_seen": 285081600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009229534510433388, + "loss": 2.5147, + "theoretical_loss": 4.1711871386976815, + "tokens_seen": 285212672 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009226324237560193, + "loss": 2.5524, + "theoretical_loss": 4.16937727303227, + "tokens_seen": 286261248 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.008147476240992546, + "objective/train/docs_used": 174327, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.199957370758057, + "objective/train/original_loss": 5.199957370758057, + "objective/train/theoretical_loss": 4.168588123122568, + "objective/train/tokens_used": 307180000, + "objective/train/value_avg": -0.019744873046875, + "objective/train/value_loss": 0.0026077248621731997, + "objective/train/value_max": -0.0035648345947265625, + "objective/train/value_min": -0.1453857421875, + "objective/train/value_reward_corr": 0.18919910641640827, + "objective/train/value_std": 0.0122222900390625, + "objective/train/weight_avg": 1.000827670097351, + "objective/train/weighted_lm_loss": 5.204638957977295, + "objective/train/weights_max": 1.0145708322525024, + "objective/train/weights_min": 0.9613944888114929, + "theoretical_loss": 4.168588123122568, + "tokens_seen": 286720000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009223113964686999, + "loss": 2.5393, + "theoretical_loss": 4.167575873352437, + "tokens_seen": 287309824 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.007263224106281996, + "objective/train/docs_used": 174865, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.122271537780762, + "objective/train/original_loss": 5.12227201461792, + "objective/train/theoretical_loss": 4.165782869375278, + "objective/train/tokens_used": 308818400, + "objective/train/value_avg": -0.0189056396484375, + "objective/train/value_loss": 0.004694041796028614, + "objective/train/value_max": -0.0033893585205078125, + "objective/train/value_min": -0.10125732421875, + "objective/train/value_reward_corr": 0.09397874782572525, + "objective/train/value_std": 0.009735107421875, + "objective/train/weight_avg": 1.0007492303848267, + "objective/train/weighted_lm_loss": 5.126623630523682, + "objective/train/weights_max": 1.010093331336975, + "objective/train/weights_min": 0.907362699508667, + "theoretical_loss": 4.165782869375278, + "tokens_seen": 288358400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009219903691813805, + "loss": 2.5435, + "theoretical_loss": 4.165782869375278, + "tokens_seen": 288358400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009216693418940611, + "loss": 2.5074, + "theoretical_loss": 4.163998191654223, + "tokens_seen": 289406976 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.015484405681490898, + "objective/train/docs_used": 176137, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.642522811889648, + "objective/train/original_loss": 4.642522811889648, + "objective/train/theoretical_loss": 4.162997943643143, + "objective/train/tokens_used": 310456800, + "objective/train/value_avg": -0.019622802734375, + "objective/train/value_loss": 0.0009587525273673236, + "objective/train/value_max": -0.0028896331787109375, + "objective/train/value_min": -0.18505859375, + "objective/train/value_reward_corr": 0.034575009031872356, + "objective/train/value_std": 0.01213836669921875, + "objective/train/weight_avg": 1.0015532970428467, + "objective/train/weighted_lm_loss": 4.65002965927124, + "objective/train/weights_max": 1.0186092853546143, + "objective/train/weights_min": 0.9569457769393921, + "theoretical_loss": 4.162997943643143, + "tokens_seen": 289996800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009213483146067416, + "loss": 2.5126, + "theoretical_loss": 4.162221771566105, + "tokens_seen": 290455552 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009210272873194222, + "loss": 2.5381, + "theoretical_loss": 4.160453541298465, + "tokens_seen": 291504128 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": -0.011748590506613255, + "objective/train/docs_used": 176478, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.314417362213135, + "objective/train/original_loss": 5.314418315887451, + "objective/train/theoretical_loss": 4.160233085015529, + "objective/train/tokens_used": 312095200, + "objective/train/value_avg": -0.020263671875, + "objective/train/value_loss": 0.008734009228646755, + "objective/train/value_max": -0.00327301025390625, + "objective/train/value_min": -0.18115234375, + "objective/train/value_reward_corr": -0.1142133627681505, + "objective/train/value_std": 0.012847900390625, + "objective/train/weight_avg": 0.9988680481910706, + "objective/train/weighted_lm_loss": 5.310923099517822, + "objective/train/weights_max": 1.0182198286056519, + "objective/train/weights_min": 0.9079734683036804, + "theoretical_loss": 4.160233085015529, + "tokens_seen": 291635200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009207062600321028, + "loss": 2.5411, + "theoretical_loss": 4.158693433837098, + "tokens_seen": 292552704 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": -0.013696713373064995, + "objective/train/docs_used": 177825, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.327238082885742, + "objective/train/original_loss": 5.3272385597229, + "objective/train/theoretical_loss": 4.157488037372401, + "objective/train/tokens_used": 313733600, + "objective/train/value_avg": -0.0184173583984375, + "objective/train/value_loss": 0.014264113269746304, + "objective/train/value_max": -0.0033626556396484375, + "objective/train/value_min": -0.194091796875, + "objective/train/value_reward_corr": 0.04131990116611166, + "objective/train/value_std": 0.01000213623046875, + "objective/train/weight_avg": 0.9987002015113831, + "objective/train/weighted_lm_loss": 5.324479579925537, + "objective/train/weights_max": 1.0195345878601074, + "objective/train/weights_min": 0.9093658328056335, + "theoretical_loss": 4.157488037372401, + "tokens_seen": 293273600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009203852327447833, + "loss": 2.5286, + "theoretical_loss": 4.156941382953835, + "tokens_seen": 293601280 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009200642054574639, + "loss": 2.5376, + "theoretical_loss": 4.155197323194555, + "tokens_seen": 294649856 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.013131109066307545, + "objective/train/docs_used": 179027, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.271512508392334, + "objective/train/original_loss": 4.271512985229492, + "objective/train/theoretical_loss": 4.154762549270199, + "objective/train/tokens_used": 315372000, + "objective/train/value_avg": -0.018768310546875, + "objective/train/value_loss": 0.00086716323858127, + "objective/train/value_max": -0.0034160614013671875, + "objective/train/value_min": -0.14013671875, + "objective/train/value_reward_corr": 0.02091407531341945, + "objective/train/value_std": 0.00946044921875, + "objective/train/weight_avg": 1.0013173818588257, + "objective/train/weighted_lm_loss": 4.277379035949707, + "objective/train/weights_max": 1.014054298400879, + "objective/train/weights_min": 0.9412168860435486, + "theoretical_loss": 4.154762549270199, + "tokens_seen": 294912000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009197431781701445, + "loss": 2.53, + "theoretical_loss": 4.153461189867425, + "tokens_seen": 295698432 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.003569399705156684, + "objective/train/docs_used": 179648, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.208392143249512, + "objective/train/original_loss": 5.208392143249512, + "objective/train/theoretical_loss": 4.15205637383102, + "objective/train/tokens_used": 317010400, + "objective/train/value_avg": -0.0201873779296875, + "objective/train/value_loss": 0.004714330658316612, + "objective/train/value_max": -0.0032978057861328125, + "objective/train/value_min": -0.2186279296875, + "objective/train/value_reward_corr": 0.06149863178838112, + "objective/train/value_std": 0.01486968994140625, + "objective/train/weight_avg": 1.000380039215088, + "objective/train/weighted_lm_loss": 5.211032867431641, + "objective/train/weights_max": 1.0217825174331665, + "objective/train/weights_min": 0.9150452017784119, + "theoretical_loss": 4.15205637383102, + "tokens_seen": 296550400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009194221508828251, + "loss": 2.526, + "theoretical_loss": 4.151732919031354, + "tokens_seen": 296747008 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009191011235955057, + "loss": 2.526, + "theoretical_loss": 4.150012447484665, + "tokens_seen": 297795584 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": -0.0011530915508046746, + "objective/train/docs_used": 180307, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.065594673156738, + "objective/train/original_loss": 5.065594673156738, + "objective/train/theoretical_loss": 4.149369268635046, + "objective/train/tokens_used": 318648800, + "objective/train/value_avg": -0.0200347900390625, + "objective/train/value_loss": 0.006705759093165398, + "objective/train/value_max": -0.003482818603515625, + "objective/train/value_min": -0.172607421875, + "objective/train/value_reward_corr": 0.2567848735012131, + "objective/train/value_std": 0.01194000244140625, + "objective/train/weight_avg": 0.99991774559021, + "objective/train/weighted_lm_loss": 5.063695907592773, + "objective/train/weights_max": 1.017327904701233, + "objective/train/weights_min": 0.9074713587760925, + "theoretical_loss": 4.149369268635046, + "tokens_seen": 298188800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009187800963081863, + "loss": 2.4879, + "theoretical_loss": 4.148299712753977, + "tokens_seen": 298844160 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.013650062493979931, + "objective/train/docs_used": 181680, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.410926818847656, + "objective/train/original_loss": 5.410926818847656, + "objective/train/theoretical_loss": 4.146700995616065, + "objective/train/tokens_used": 320287200, + "objective/train/value_avg": -0.0211639404296875, + "objective/train/value_loss": 0.0017911380855366588, + "objective/train/value_max": -0.004215240478515625, + "objective/train/value_min": -0.1365966796875, + "objective/train/value_reward_corr": 0.07743523711295246, + "objective/train/value_std": 0.01407623291015625, + "objective/train/weight_avg": 1.0013738870620728, + "objective/train/weighted_lm_loss": 5.418900489807129, + "objective/train/weights_max": 1.0119318962097168, + "objective/train/weights_min": 0.9224836230278015, + "theoretical_loss": 4.146700995616065, + "tokens_seen": 299827200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009184590690208669, + "loss": 2.4852, + "theoretical_loss": 4.146594653083293, + "tokens_seen": 299892736 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009181380417335474, + "loss": 2.4785, + "theoretical_loss": 4.144897207423284, + "tokens_seen": 300941312 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.014913391321897507, + "objective/train/docs_used": 182385, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.149347305297852, + "objective/train/original_loss": 5.149348258972168, + "objective/train/theoretical_loss": 4.144051320960009, + "objective/train/tokens_used": 321925600, + "objective/train/value_avg": -0.0192718505859375, + "objective/train/value_loss": 0.0009770480683073401, + "objective/train/value_max": -0.0037364959716796875, + "objective/train/value_min": -0.1495361328125, + "objective/train/value_reward_corr": 0.007012459265939749, + "objective/train/value_std": 0.01241302490234375, + "objective/train/weight_avg": 1.0014961957931519, + "objective/train/weighted_lm_loss": 5.157305717468262, + "objective/train/weights_max": 1.0149961709976196, + "objective/train/weights_min": 0.964177668094635, + "theoretical_loss": 4.144051320960009, + "tokens_seen": 301465600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009178170144462279, + "loss": 2.5222, + "theoretical_loss": 4.143207315420783, + "tokens_seen": 301989888 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009174959871589085, + "loss": 2.4997, + "theoretical_loss": 4.141524917408454, + "tokens_seen": 303038464 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.010648947209119797, + "objective/train/docs_used": 183646, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.338652610778809, + "objective/train/original_loss": 5.33865213394165, + "objective/train/theoretical_loss": 4.141420015006382, + "objective/train/tokens_used": 323564000, + "objective/train/value_avg": -0.0159912109375, + "objective/train/value_loss": 0.0014841792872175574, + "objective/train/value_max": -0.0033626556396484375, + "objective/train/value_min": -0.1104736328125, + "objective/train/value_reward_corr": 0.052937609207155095, + "objective/train/value_std": 0.00872802734375, + "objective/train/weight_avg": 1.0010721683502197, + "objective/train/weighted_lm_loss": 5.344601631164551, + "objective/train/weights_max": 1.0109339952468872, + "objective/train/weights_min": 0.9269888401031494, + "theoretical_loss": 4.141420015006382, + "tokens_seen": 303104000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009171749598715891, + "loss": 2.5033, + "theoretical_loss": 4.1398499543946565, + "tokens_seen": 304087040 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": -0.038973111659288406, + "objective/train/docs_used": 184075, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.032235145568848, + "objective/train/original_loss": 5.032235145568848, + "objective/train/theoretical_loss": 4.138806852152502, + "objective/train/tokens_used": 325202400, + "objective/train/value_avg": -0.0234832763671875, + "objective/train/value_loss": 0.0381106436252594, + "objective/train/value_max": -0.004024505615234375, + "objective/train/value_min": -0.1763916015625, + "objective/train/value_reward_corr": -0.022799861496740896, + "objective/train/value_std": 0.01401519775390625, + "objective/train/weight_avg": 0.9962885975837708, + "objective/train/weighted_lm_loss": 5.019482612609863, + "objective/train/weights_max": 1.0177325010299683, + "objective/train/weights_min": 0.906742513179779, + "theoretical_loss": 4.138806852152502, + "tokens_seen": 304742400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009168539325842696, + "loss": 2.5566, + "theoretical_loss": 4.138182368053505, + "tokens_seen": 305135616 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009165329052969502, + "loss": 2.5405, + "theoretical_loss": 4.136522100715087, + "tokens_seen": 306184192 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": -0.021592741832137108, + "objective/train/docs_used": 185377, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.535671234130859, + "objective/train/original_loss": 5.535671234130859, + "objective/train/theoretical_loss": 4.136211610760441, + "objective/train/tokens_used": 326840800, + "objective/train/value_avg": -0.0205078125, + "objective/train/value_loss": 0.016459917649626732, + "objective/train/value_max": -0.0036067962646484375, + "objective/train/value_min": -0.134765625, + "objective/train/value_reward_corr": 0.04429739848430024, + "objective/train/value_std": 0.01132965087890625, + "objective/train/weight_avg": 0.9979214668273926, + "objective/train/weighted_lm_loss": 5.528247356414795, + "objective/train/weights_max": 1.0135046243667603, + "objective/train/weights_min": 0.9096146821975708, + "theoretical_loss": 4.136211610760441, + "tokens_seen": 306380800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009162118780096308, + "loss": 2.5364, + "theoretical_loss": 4.134869095355876, + "tokens_seen": 307232768 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.009094956330955029, + "objective/train/docs_used": 186163, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.671026229858398, + "objective/train/original_loss": 4.67102575302124, + "objective/train/theoretical_loss": 4.133634073066595, + "objective/train/tokens_used": 328479200, + "objective/train/value_avg": -0.016326904296875, + "objective/train/value_loss": 0.002849227748811245, + "objective/train/value_max": -0.0032863616943359375, + "objective/train/value_min": -0.118408203125, + "objective/train/value_reward_corr": 0.0234195514671722, + "objective/train/value_std": 0.0081329345703125, + "objective/train/weight_avg": 1.0009233951568604, + "objective/train/weighted_lm_loss": 4.675345420837402, + "objective/train/weights_max": 1.011823058128357, + "objective/train/weights_min": 0.9139544367790222, + "theoretical_loss": 4.133634073066595, + "tokens_seen": 308019200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009158908507223113, + "loss": 2.5202, + "theoretical_loss": 4.1332232955893105, + "tokens_seen": 308281344 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009155698234349919, + "loss": 2.545, + "theoretical_loss": 4.131584645656535, + "tokens_seen": 309329920 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.00550789525732398, + "objective/train/docs_used": 187126, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.005856513977051, + "objective/train/original_loss": 5.005856513977051, + "objective/train/theoretical_loss": 4.131074025093778, + "objective/train/tokens_used": 330117600, + "objective/train/value_avg": -0.0194244384765625, + "objective/train/value_loss": 0.003192130709066987, + "objective/train/value_max": -0.0036220550537109375, + "objective/train/value_min": -0.1510009765625, + "objective/train/value_reward_corr": -0.006317558783211708, + "objective/train/value_std": 0.0108184814453125, + "objective/train/weight_avg": 1.0005666017532349, + "objective/train/weighted_lm_loss": 5.0102925300598145, + "objective/train/weights_max": 1.0142545700073242, + "objective/train/weights_min": 0.9328802824020386, + "theoretical_loss": 4.131074025093778, + "tokens_seen": 309657600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009152487961476725, + "loss": 2.5287, + "theoretical_loss": 4.129953090417319, + "tokens_seen": 310378496 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.00516870291903615, + "objective/train/docs_used": 187875, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.5035786628723145, + "objective/train/original_loss": 5.503580093383789, + "objective/train/theoretical_loss": 4.128531256565763, + "objective/train/tokens_used": 331756000, + "objective/train/value_avg": -0.021575927734375, + "objective/train/value_loss": 0.004168914631009102, + "objective/train/value_max": -0.003551483154296875, + "objective/train/value_min": -0.2017822265625, + "objective/train/value_reward_corr": 0.27069365923114347, + "objective/train/value_std": 0.0173492431640625, + "objective/train/weight_avg": 1.000537395477295, + "objective/train/weighted_lm_loss": 5.505998611450195, + "objective/train/weights_max": 1.0203304290771484, + "objective/train/weights_min": 0.9269610047340393, + "theoretical_loss": 4.128531256565763, + "tokens_seen": 311296000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009149277688603532, + "loss": 2.4937, + "theoretical_loss": 4.128328575341129, + "tokens_seen": 311427072 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009146067415730337, + "loss": 2.5095, + "theoretical_loss": 4.12671104649836, + "tokens_seen": 312475648 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": -0.009911863133311272, + "objective/train/docs_used": 188927, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.123474597930908, + "objective/train/original_loss": 5.12347412109375, + "objective/train/theoretical_loss": 4.126005560824196, + "objective/train/tokens_used": 333394400, + "objective/train/value_avg": -0.019775390625, + "objective/train/value_loss": 0.012699831277132034, + "objective/train/value_max": -0.0038394927978515625, + "objective/train/value_min": -0.28369140625, + "objective/train/value_reward_corr": 0.052418691553121476, + "objective/train/value_std": 0.011322021484375, + "objective/train/weight_avg": 0.9990707039833069, + "objective/train/weighted_lm_loss": 5.124359607696533, + "objective/train/weights_max": 1.0287114381790161, + "objective/train/weights_min": 0.9068987965583801, + "theoretical_loss": 4.126005560824196, + "tokens_seen": 312934400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009142857142857143, + "loss": 2.5239, + "theoretical_loss": 4.125100450551725, + "tokens_seen": 313524224 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": -0.0025120112113654613, + "objective/train/docs_used": 189547, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.7408576011657715, + "objective/train/original_loss": 4.74085807800293, + "objective/train/theoretical_loss": 4.123496734747793, + "objective/train/tokens_used": 335032800, + "objective/train/value_avg": -0.0196685791015625, + "objective/train/value_loss": 0.00565487751737237, + "objective/train/value_max": -0.0034694671630859375, + "objective/train/value_min": -0.1368408203125, + "objective/train/value_reward_corr": 0.2987129832414899, + "objective/train/value_std": 0.0128173828125, + "objective/train/weight_avg": 0.9997766017913818, + "objective/train/weighted_lm_loss": 4.7397613525390625, + "objective/train/weights_max": 1.01371431350708, + "objective/train/weights_min": 0.9074086546897888, + "theoretical_loss": 4.123496734747793, + "tokens_seen": 314572800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009139646869983949, + "loss": 2.55, + "theoretical_loss": 4.123496734747793, + "tokens_seen": 314572800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009136436597110755, + "loss": 2.5477, + "theoretical_loss": 4.121899846908677, + "tokens_seen": 315621376 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.009905599988996983, + "objective/train/docs_used": 190733, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.033602714538574, + "objective/train/original_loss": 5.033603191375732, + "objective/train/theoretical_loss": 4.121004578673752, + "objective/train/tokens_used": 336671200, + "objective/train/value_avg": -0.01885986328125, + "objective/train/value_loss": 0.0013285496970638633, + "objective/train/value_max": -0.0033626556396484375, + "objective/train/value_min": -0.1470947265625, + "objective/train/value_reward_corr": -0.00968288572334438, + "objective/train/value_std": 0.010406494140625, + "objective/train/weight_avg": 1.0009971857070923, + "objective/train/weighted_lm_loss": 5.039534568786621, + "objective/train/weights_max": 1.0147455930709839, + "objective/train/weights_min": 0.939893901348114, + "theoretical_loss": 4.121004578673752, + "tokens_seen": 316211200 + }, + { + "epoch": 0.1, + "learning_rate": 0.000913322632423756, + "loss": 2.5146, + "theoretical_loss": 4.120309735423871, + "tokens_seen": 316669952 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009130016051364366, + "loss": 2.4848, + "theoretical_loss": 4.118726349242221, + "tokens_seen": 317718528 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0005289696273393929, + "objective/train/docs_used": 191330, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.3464179039001465, + "objective/train/original_loss": 5.346417427062988, + "objective/train/theoretical_loss": 4.118528896321316, + "objective/train/tokens_used": 338309600, + "objective/train/value_avg": -0.020904541015625, + "objective/train/value_loss": 0.010447087697684765, + "objective/train/value_max": -0.0037212371826171875, + "objective/train/value_min": -0.144775390625, + "objective/train/value_reward_corr": 0.028231875033881252, + "objective/train/value_std": 0.0115814208984375, + "objective/train/weight_avg": 1.0001038312911987, + "objective/train/weighted_lm_loss": 5.345874786376953, + "objective/train/weights_max": 1.0140599012374878, + "objective/train/weights_min": 0.9063549041748047, + "theoretical_loss": 4.118528896321316, + "tokens_seen": 317849600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009126805778491172, + "loss": 2.5166, + "theoretical_loss": 4.117149637864041, + "tokens_seen": 318767104 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": -0.020221475511789322, + "objective/train/docs_used": 192764, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.753813743591309, + "objective/train/original_loss": 4.753813743591309, + "objective/train/theoretical_loss": 4.116069494717399, + "objective/train/tokens_used": 339948000, + "objective/train/value_avg": -0.017822265625, + "objective/train/value_loss": 0.03044849820435047, + "objective/train/value_max": -0.0036792755126953125, + "objective/train/value_min": -0.28369140625, + "objective/train/value_reward_corr": -0.03286722821601188, + "objective/train/value_std": 0.0106048583984375, + "objective/train/weight_avg": 0.9981256127357483, + "objective/train/weighted_lm_loss": 4.744387149810791, + "objective/train/weights_max": 1.0287185907363892, + "objective/train/weights_min": 0.9110072255134583, + "theoretical_loss": 4.116069494717399, + "tokens_seen": 319488000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009123595505617977, + "loss": 2.463, + "theoretical_loss": 4.115579551333372, + "tokens_seen": 319815680 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009120385232744783, + "loss": 2.4632, + "theoretical_loss": 4.114016040230357, + "tokens_seen": 320864256 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.01228283066302538, + "objective/train/docs_used": 193449, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.304341793060303, + "objective/train/original_loss": 5.304340839385986, + "objective/train/theoretical_loss": 4.113626184124224, + "objective/train/tokens_used": 341586400, + "objective/train/value_avg": -0.02056884765625, + "objective/train/value_loss": 0.004052348434925079, + "objective/train/value_max": -0.0029010772705078125, + "objective/train/value_min": -0.1806640625, + "objective/train/value_reward_corr": 0.021892302170739758, + "objective/train/value_std": 0.01265716552734375, + "objective/train/weight_avg": 1.0012480020523071, + "objective/train/weighted_lm_loss": 5.311839580535889, + "objective/train/weights_max": 1.0178310871124268, + "objective/train/weights_min": 0.9084504842758179, + "theoretical_loss": 4.113626184124224, + "tokens_seen": 321126400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009117174959871589, + "loss": 2.5003, + "theoretical_loss": 4.112459055663768, + "tokens_seen": 321912832 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0023210730869323015, + "objective/train/docs_used": 194727, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.274553298950195, + "objective/train/original_loss": 5.274552822113037, + "objective/train/theoretical_loss": 4.111198777968905, + "objective/train/tokens_used": 343224800, + "objective/train/value_avg": -0.0235748291015625, + "objective/train/value_loss": 0.007437140680849552, + "objective/train/value_max": -0.003124237060546875, + "objective/train/value_min": -0.1435546875, + "objective/train/value_reward_corr": 0.2508313806843079, + "objective/train/value_std": 0.0153350830078125, + "objective/train/weight_avg": 1.000268578529358, + "objective/train/weighted_lm_loss": 5.276092529296875, + "objective/train/weights_max": 1.0143625736236572, + "objective/train/weights_min": 0.9108084440231323, + "theoretical_loss": 4.111198777968905, + "tokens_seen": 322764800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009113964686998395, + "loss": 2.5047, + "theoretical_loss": 4.110908549263647, + "tokens_seen": 322961408 + }, + { + "epoch": 0.1, + "learning_rate": 0.00091107544141252, + "loss": 2.5076, + "theoretical_loss": 4.109364473174075, + "tokens_seen": 324009984 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.01160583645105362, + "objective/train/docs_used": 195463, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.584168910980225, + "objective/train/original_loss": 4.584168910980225, + "objective/train/theoretical_loss": 4.108787092774909, + "objective/train/tokens_used": 344863200, + "objective/train/value_avg": -0.0223541259765625, + "objective/train/value_loss": 0.00505579961463809, + "objective/train/value_max": -0.0036792755126953125, + "objective/train/value_min": -0.11676025390625, + "objective/train/value_reward_corr": 0.04673277844593481, + "objective/train/value_std": 0.01165771484375, + "objective/train/weight_avg": 1.0011852979660034, + "objective/train/weighted_lm_loss": 4.589744567871094, + "objective/train/weights_max": 1.0111967325210571, + "objective/train/weights_min": 0.9082949161529541, + "theoretical_loss": 4.108787092774909, + "tokens_seen": 324403200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009107544141252007, + "loss": 2.4689, + "theoretical_loss": 4.107826780046074, + "tokens_seen": 325058560 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.01006318163126707, + "objective/train/docs_used": 196731, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.750372409820557, + "objective/train/original_loss": 4.750372409820557, + "objective/train/theoretical_loss": 4.106390948095338, + "objective/train/tokens_used": 346501600, + "objective/train/value_avg": -0.01904296875, + "objective/train/value_loss": 0.0010882962960749865, + "objective/train/value_max": -0.004085540771484375, + "objective/train/value_min": -0.1478271484375, + "objective/train/value_reward_corr": 0.07931476636145887, + "objective/train/value_std": 0.01058197021484375, + "objective/train/weight_avg": 1.001011610031128, + "objective/train/weighted_lm_loss": 4.756397247314453, + "objective/train/weights_max": 1.0113720893859863, + "objective/train/weights_min": 0.9384897351264954, + "theoretical_loss": 4.106390948095338, + "tokens_seen": 326041600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009104333868378813, + "loss": 2.4944, + "theoretical_loss": 4.106295423030614, + "tokens_seen": 326107136 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009101123595505618, + "loss": 2.4475, + "theoretical_loss": 4.104770355771754, + "tokens_seen": 327155712 + }, + { + "debugging/Self-BLEU-5": 0.3841532625917073, + "debugging/distinct-1-grams": 0.7832216589580511, + "debugging/distinct-2-grams": 0.9618292669346826, + "debugging/entropy-1-grams": 5.637375639920272, + "debugging/entropy-2-grams": 6.413549369763027, + "debugging/length": 486.6666666666667, + "debugging/num_segments": 9, + "debugging/raw_token_scores_avg": 0.0037555587477982044, + "debugging/raw_token_scores_std": 0.02093208208680153, + "epoch": 0.1, + "objective/train/advantage_avg": 0.015462547540664673, + "objective/train/docs_used": 197513, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.804978847503662, + "objective/train/original_loss": 4.804978370666504, + "objective/train/theoretical_loss": 4.10401016644798, + "objective/train/tokens_used": 348140000, + "objective/train/value_avg": -0.019195556640625, + "objective/train/value_loss": 0.0007741200970485806, + "objective/train/value_max": -0.003650665283203125, + "objective/train/value_min": -0.177001953125, + "objective/train/value_reward_corr": 0.03913239440382386, + "objective/train/value_std": 0.0108642578125, + "objective/train/weight_avg": 1.0015500783920288, + "objective/train/weighted_lm_loss": 4.812781810760498, + "objective/train/weights_max": 1.017770528793335, + "objective/train/weights_min": 0.9407235980033875, + "theoretical_loss": 4.10401016644798, + "tokens_seen": 327680000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009097913322632424, + "loss": 2.4787, + "theoretical_loss": 4.103251532399884, + "tokens_seen": 328204288 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909470304975923, + "loss": 2.4752, + "theoretical_loss": 4.101738907525098, + "tokens_seen": 329252864 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0024382087867707014, + "objective/train/docs_used": 198138, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.224846363067627, + "objective/train/original_loss": 5.224845886230469, + "objective/train/theoretical_loss": 4.101644573252066, + "objective/train/tokens_used": 349778400, + "objective/train/value_avg": -0.02044677734375, + "objective/train/value_loss": 0.005316479597240686, + "objective/train/value_max": -0.0035381317138671875, + "objective/train/value_min": -0.1318359375, + "objective/train/value_reward_corr": 0.35232303974301077, + "objective/train/value_std": 0.0159454345703125, + "objective/train/weight_avg": 1.0002700090408325, + "objective/train/weighted_lm_loss": 5.225821018218994, + "objective/train/weights_max": 1.0108346939086914, + "objective/train/weights_min": 0.9200348258018494, + "theoretical_loss": 4.101644573252066, + "tokens_seen": 329318400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009091492776886036, + "loss": 2.4788, + "theoretical_loss": 4.100232436230659, + "tokens_seen": 330301440 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0001736995909595862, + "objective/train/docs_used": 199416, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.892314434051514, + "objective/train/original_loss": 4.892314434051514, + "objective/train/theoretical_loss": 4.099293996766681, + "objective/train/tokens_used": 351416800, + "objective/train/value_avg": -0.017120361328125, + "objective/train/value_loss": 0.003828026819974184, + "objective/train/value_max": -0.0034427642822265625, + "objective/train/value_min": -0.1307373046875, + "objective/train/value_reward_corr": -0.014919388497025373, + "objective/train/value_std": 0.01116943359375, + "objective/train/weight_avg": 1.000036358833313, + "objective/train/weighted_lm_loss": 4.89425802230835, + "objective/train/weights_max": 1.0130935907363892, + "objective/train/weights_min": 0.9450024366378784, + "theoretical_loss": 4.099293996766681, + "tokens_seen": 330956800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009088282504012841, + "loss": 2.4412, + "theoretical_loss": 4.098732074066591, + "tokens_seen": 331350016 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009085072231139647, + "loss": 2.4397, + "theoretical_loss": 4.097237777043363, + "tokens_seen": 332398592 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.012096314691007137, + "objective/train/docs_used": 200524, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.370635509490967, + "objective/train/original_loss": 5.370635509490967, + "objective/train/theoretical_loss": 4.096958268030786, + "objective/train/tokens_used": 353055200, + "objective/train/value_avg": -0.0200042724609375, + "objective/train/value_loss": 0.0018765622517094016, + "objective/train/value_max": -0.0028781890869140625, + "objective/train/value_min": -0.1380615234375, + "objective/train/value_reward_corr": 0.003196297278843253, + "objective/train/value_std": 0.00997161865234375, + "objective/train/weight_avg": 1.0012189149856567, + "objective/train/weighted_lm_loss": 5.377025604248047, + "objective/train/weights_max": 1.0138157606124878, + "objective/train/weights_min": 0.942187488079071, + "theoretical_loss": 4.096958268030786, + "tokens_seen": 332595200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009081861958266453, + "loss": 2.4686, + "theoretical_loss": 4.095749501625689, + "tokens_seen": 333447168 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": -0.018608082085847855, + "objective/train/docs_used": 201196, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.190551280975342, + "objective/train/original_loss": 5.190552234649658, + "objective/train/theoretical_loss": 4.09463722080479, + "objective/train/tokens_used": 354693600, + "objective/train/value_avg": -0.0228729248046875, + "objective/train/value_loss": 0.018549686297774315, + "objective/train/value_max": -0.0035800933837890625, + "objective/train/value_min": -0.1680908203125, + "objective/train/value_reward_corr": 0.39942058107910294, + "objective/train/value_std": 0.0150909423828125, + "objective/train/weight_avg": 0.9982302188873291, + "objective/train/weighted_lm_loss": 5.184675216674805, + "objective/train/weights_max": 1.0135992765426636, + "objective/train/weights_min": 0.910168468952179, + "theoretical_loss": 4.09463722080479, + "tokens_seen": 334233600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009078651685393258, + "loss": 2.4556, + "theoretical_loss": 4.094267204726426, + "tokens_seen": 334495744 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009075441412520064, + "loss": 2.4972, + "theoretical_loss": 4.092790843700574, + "tokens_seen": 335544320 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.013087308965623379, + "objective/train/docs_used": 202460, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.846424579620361, + "objective/train/original_loss": 4.846424102783203, + "objective/train/theoretical_loss": 4.092330691513631, + "objective/train/tokens_used": 356332000, + "objective/train/value_avg": -0.02099609375, + "objective/train/value_loss": 0.0017303975764662027, + "objective/train/value_max": -0.00327301025390625, + "objective/train/value_min": -0.12347412109375, + "objective/train/value_reward_corr": 0.12843921961867166, + "objective/train/value_std": 0.0124053955078125, + "objective/train/weight_avg": 1.0013173818588257, + "objective/train/weighted_lm_loss": 4.853082180023193, + "objective/train/weights_max": 1.0123634338378906, + "objective/train/weights_min": 0.9224476218223572, + "theoretical_loss": 4.092330691513631, + "tokens_seen": 335872000 + }, + { + "epoch": 0.1, + "learning_rate": 0.000907223113964687, + "loss": 2.4212, + "theoretical_loss": 4.091320376339368, + "tokens_seen": 336592896 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.014284283854067326, + "objective/train/docs_used": 203033, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.912319660186768, + "objective/train/original_loss": 4.912320613861084, + "objective/train/theoretical_loss": 4.0900385191913164, + "objective/train/tokens_used": 357970400, + "objective/train/value_avg": -0.0212554931640625, + "objective/train/value_loss": 0.0018556772265583277, + "objective/train/value_max": -0.0032100677490234375, + "objective/train/value_min": -0.1414794921875, + "objective/train/value_reward_corr": 0.1589866288372039, + "objective/train/value_std": 0.017364501953125, + "objective/train/weight_avg": 1.0014376640319824, + "objective/train/weighted_lm_loss": 4.9194536209106445, + "objective/train/weights_max": 1.0140727758407593, + "objective/train/weights_min": 0.9066551327705383, + "theoretical_loss": 4.0900385191913164, + "tokens_seen": 337510400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009069020866773677, + "loss": 2.45, + "theoretical_loss": 4.089855760864484, + "tokens_seen": 337641472 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009065810593900482, + "loss": 2.4582, + "theoretical_loss": 4.0883969559223186, + "tokens_seen": 338690048 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.01728048175573349, + "objective/train/docs_used": 204232, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.255428791046143, + "objective/train/original_loss": 5.255428791046143, + "objective/train/theoretical_loss": 4.087760545426889, + "objective/train/tokens_used": 359608800, + "objective/train/value_avg": -0.0269622802734375, + "objective/train/value_loss": 0.0017148378537967801, + "objective/train/value_max": -0.0033512115478515625, + "objective/train/value_min": -0.1588134765625, + "objective/train/value_reward_corr": 0.32808984258368934, + "objective/train/value_std": 0.02032470703125, + "objective/train/weight_avg": 1.0017366409301758, + "objective/train/weighted_lm_loss": 5.264470100402832, + "objective/train/weights_max": 1.014555811882019, + "objective/train/weights_min": 0.9651771187782288, + "theoretical_loss": 4.087760545426889, + "tokens_seen": 339148800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009062600321027288, + "loss": 2.4678, + "theoretical_loss": 4.086943920578378, + "tokens_seen": 339738624 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.009236485697329044, + "objective/train/docs_used": 205032, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.945983409881592, + "objective/train/original_loss": 4.945984363555908, + "objective/train/theoretical_loss": 4.085496614311752, + "objective/train/tokens_used": 361247200, + "objective/train/value_avg": -0.0157470703125, + "objective/train/value_loss": 0.0019442799966782331, + "objective/train/value_max": -0.002811431884765625, + "objective/train/value_min": -0.14794921875, + "objective/train/value_reward_corr": 0.08566763468040714, + "objective/train/value_std": 0.00994110107421875, + "objective/train/weight_avg": 1.0009331703186035, + "objective/train/weighted_lm_loss": 4.950783729553223, + "objective/train/weights_max": 1.014830470085144, + "objective/train/weights_min": 0.9329550266265869, + "theoretical_loss": 4.085496614311752, + "tokens_seen": 340787200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009059390048154094, + "loss": 2.4448, + "theoretical_loss": 4.085496614311752, + "tokens_seen": 340787200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00090561797752809, + "loss": 2.437, + "theoretical_loss": 4.084054997009675, + "tokens_seen": 341835776 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": -0.007180529180914164, + "objective/train/docs_used": 206109, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.637508392333984, + "objective/train/original_loss": 4.637508392333984, + "objective/train/theoretical_loss": 4.083246572388344, + "objective/train/tokens_used": 362885600, + "objective/train/value_avg": -0.0233917236328125, + "objective/train/value_loss": 0.010419834405183792, + "objective/train/value_max": -0.0033893585205078125, + "objective/train/value_min": -0.1776123046875, + "objective/train/value_reward_corr": 0.32073423837656934, + "objective/train/value_std": 0.0217132568359375, + "objective/train/weight_avg": 0.9993329048156738, + "objective/train/weighted_lm_loss": 4.635405540466309, + "objective/train/weights_max": 1.015971302986145, + "objective/train/weights_min": 0.9082985520362854, + "theoretical_loss": 4.083246572388344, + "tokens_seen": 342425600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009052969502407705, + "loss": 2.4376, + "theoretical_loss": 4.082619028962182, + "tokens_seen": 342884352 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009049759229534511, + "loss": 2.4366, + "theoretical_loss": 4.081188670856844, + "tokens_seen": 343932928 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.010915465652942657, + "objective/train/docs_used": 206760, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.59447717666626, + "objective/train/original_loss": 4.594477653503418, + "objective/train/theoretical_loss": 4.081010268600093, + "objective/train/tokens_used": 364524000, + "objective/train/value_avg": -0.019500732421875, + "objective/train/value_loss": 0.0030601948965340853, + "objective/train/value_max": -0.003749847412109375, + "objective/train/value_min": -0.141357421875, + "objective/train/value_reward_corr": 0.1296204415639424, + "objective/train/value_std": 0.01174163818359375, + "objective/train/weight_avg": 1.0011066198349, + "objective/train/weighted_lm_loss": 4.600259304046631, + "objective/train/weights_max": 1.0141730308532715, + "objective/train/weights_min": 0.9075483679771423, + "theoretical_loss": 4.081010268600093, + "tokens_seen": 344064000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009046548956661317, + "loss": 2.443, + "theoretical_loss": 4.079763883773593, + "tokens_seen": 344981504 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.00654487544670701, + "objective/train/docs_used": 207336, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.802588939666748, + "objective/train/original_loss": 4.802588939666748, + "objective/train/theoretical_loss": 4.0787875542426315, + "objective/train/tokens_used": 366162400, + "objective/train/value_avg": -0.019439697265625, + "objective/train/value_loss": 0.003040136070922017, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.1478271484375, + "objective/train/value_reward_corr": 0.042000976820954876, + "objective/train/value_std": 0.01020050048828125, + "objective/train/weight_avg": 1.0006693601608276, + "objective/train/weighted_lm_loss": 4.8062968254089355, + "objective/train/weights_max": 1.012673258781433, + "objective/train/weights_min": 0.9263184070587158, + "theoretical_loss": 4.0787875542426315, + "tokens_seen": 345702400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009043338683788122, + "loss": 2.4308, + "theoretical_loss": 4.078344629179623, + "tokens_seen": 346030080 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009040128410914928, + "loss": 2.4317, + "theoretical_loss": 4.076930868924384, + "tokens_seen": 347078656 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.012921093963086605, + "objective/train/docs_used": 208796, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.005258560180664, + "objective/train/original_loss": 5.005258083343506, + "objective/train/theoretical_loss": 4.076578282916229, + "objective/train/tokens_used": 367800800, + "objective/train/value_avg": -0.019439697265625, + "objective/train/value_loss": 0.003958164248615503, + "objective/train/value_max": -0.0038242340087890625, + "objective/train/value_min": -0.1959228515625, + "objective/train/value_reward_corr": 0.041328495309840035, + "objective/train/value_std": 0.01114654541015625, + "objective/train/weight_avg": 1.0013114213943481, + "objective/train/weighted_lm_loss": 5.011721611022949, + "objective/train/weights_max": 1.0197200775146484, + "objective/train/weights_min": 0.9066020846366882, + "theoretical_loss": 4.076578282916229, + "tokens_seen": 347340800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009036918138041734, + "loss": 2.4262, + "theoretical_loss": 4.075522565234643, + "tokens_seen": 348127232 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.05734134465456009, + "objective/train/docs_used": 209597, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.359332084655762, + "objective/train/original_loss": 5.3593316078186035, + "objective/train/theoretical_loss": 4.074382310479402, + "objective/train/tokens_used": 369439200, + "objective/train/value_avg": -0.0221710205078125, + "objective/train/value_loss": 0.0663055032491684, + "objective/train/value_max": -0.0036640167236328125, + "objective/train/value_min": -0.15478515625, + "objective/train/value_reward_corr": 0.0532339474674834, + "objective/train/value_std": 0.0156402587890625, + "objective/train/weight_avg": 0.9945873618125916, + "objective/train/weighted_lm_loss": 5.324788570404053, + "objective/train/weights_max": 1.0123234987258911, + "objective/train/weights_min": 0.9063119888305664, + "theoretical_loss": 4.074382310479402, + "tokens_seen": 348979200 + }, + { + "epoch": 0.11, + "learning_rate": 0.000903370786516854, + "loss": 2.3937, + "theoretical_loss": 4.074119680709633, + "tokens_seen": 349175808 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009030497592295346, + "loss": 2.4272, + "theoretical_loss": 4.072722178316271, + "tokens_seen": 350224384 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.012496023438870907, + "objective/train/docs_used": 210243, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.275186538696289, + "objective/train/original_loss": 4.275186538696289, + "objective/train/theoretical_loss": 4.072199495003675, + "objective/train/tokens_used": 371077600, + "objective/train/value_avg": -0.020965576171875, + "objective/train/value_loss": 0.0027709128335118294, + "objective/train/value_max": -0.0034694671630859375, + "objective/train/value_min": -0.1246337890625, + "objective/train/value_reward_corr": 0.030463599810835306, + "objective/train/value_std": 0.012054443359375, + "objective/train/weight_avg": 1.00126314163208, + "objective/train/weighted_lm_loss": 4.2804107666015625, + "objective/train/weights_max": 1.0124585628509521, + "objective/train/weights_min": 0.9136751890182495, + "theoretical_loss": 4.072199495003675, + "tokens_seen": 350617600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009027287319422151, + "loss": 2.345, + "theoretical_loss": 4.071330021384458, + "tokens_seen": 351272960 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.00908074900507927, + "objective/train/docs_used": 211560, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.307843208312988, + "objective/train/original_loss": 4.307843208312988, + "objective/train/theoretical_loss": 4.070029696729449, + "objective/train/tokens_used": 372716000, + "objective/train/value_avg": -0.021575927734375, + "objective/train/value_loss": 0.004773054271936417, + "objective/train/value_max": -0.003765106201171875, + "objective/train/value_min": -0.17626953125, + "objective/train/value_reward_corr": 0.049259353374944775, + "objective/train/value_std": 0.0121917724609375, + "objective/train/weight_avg": 1.0009315013885498, + "objective/train/weighted_lm_loss": 4.3120036125183105, + "objective/train/weights_max": 1.0177159309387207, + "objective/train/weights_min": 0.9075234532356262, + "theoretical_loss": 4.070029696729449, + "tokens_seen": 352256000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009024077046548957, + "loss": 2.3835, + "theoretical_loss": 4.069943173602451, + "tokens_seen": 352321536 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009020866773675762, + "loss": 2.4154, + "theoretical_loss": 4.068561599012304, + "tokens_seen": 353370112 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.04034550115466118, + "objective/train/docs_used": 212189, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.653895378112793, + "objective/train/original_loss": 4.653894901275635, + "objective/train/theoretical_loss": 4.0678727780229575, + "objective/train/tokens_used": 374354400, + "objective/train/value_avg": -0.0311279296875, + "objective/train/value_loss": 0.03962668776512146, + "objective/train/value_max": -0.003376007080078125, + "objective/train/value_min": -0.353759765625, + "objective/train/value_reward_corr": 0.9365502444875473, + "objective/train/value_std": 0.04595947265625, + "objective/train/weight_avg": 0.9961587190628052, + "objective/train/weighted_lm_loss": 4.645400524139404, + "objective/train/weights_max": 1.035946011543274, + "objective/train/weights_min": 0.9096989631652832, + "theoretical_loss": 4.0678727780229575, + "tokens_seen": 353894400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009017656500802568, + "loss": 2.3631, + "theoretical_loss": 4.0671852620053865, + "tokens_seen": 354418688 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009014446227929374, + "loss": 2.3879, + "theoretical_loss": 4.0658141273179655, + "tokens_seen": 355467264 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.014567661099135876, + "objective/train/docs_used": 213521, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.5442728996276855, + "objective/train/original_loss": 4.544273376464844, + "objective/train/theoretical_loss": 4.065728603334263, + "objective/train/tokens_used": 375992800, + "objective/train/value_avg": -0.0204010009765625, + "objective/train/value_loss": 0.0009006726904772222, + "objective/train/value_max": -0.0034160614013671875, + "objective/train/value_min": -0.373291015625, + "objective/train/value_reward_corr": 0.04817760063835769, + "objective/train/value_std": 0.0154876708984375, + "objective/train/weight_avg": 1.0014612674713135, + "objective/train/weighted_lm_loss": 4.5504631996154785, + "objective/train/weights_max": 1.0379433631896973, + "objective/train/weights_min": 0.9698310494422913, + "theoretical_loss": 4.065728603334263, + "tokens_seen": 355532800 + }, + { + "epoch": 0.11, + "learning_rate": 0.000901123595505618, + "loss": 2.3648, + "theoretical_loss": 4.0644481600268625, + "tokens_seen": 356515840 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.01270924974232912, + "objective/train/docs_used": 214188, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.7557692527771, + "objective/train/original_loss": 4.755769729614258, + "objective/train/theoretical_loss": 4.06359703915628, + "objective/train/tokens_used": 377631200, + "objective/train/value_avg": -0.019287109375, + "objective/train/value_loss": 0.0009729330195114017, + "objective/train/value_max": -0.0034160614013671875, + "objective/train/value_min": -0.1513671875, + "objective/train/value_reward_corr": 0.0635340902209652, + "objective/train/value_std": 0.01210784912109375, + "objective/train/weight_avg": 1.0012757778167725, + "objective/train/weighted_lm_loss": 4.762186050415039, + "objective/train/weights_max": 1.0151866674423218, + "objective/train/weights_min": 0.9582651853561401, + "theoretical_loss": 4.06359703915628, + "tokens_seen": 357171200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009008025682182985, + "loss": 2.3483, + "theoretical_loss": 4.06308732554517, + "tokens_seen": 357564416 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009004815409309791, + "loss": 2.3816, + "theoretical_loss": 4.061731589618044, + "tokens_seen": 358612992 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0018573952838778496, + "objective/train/docs_used": 215284, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.05596399307251, + "objective/train/original_loss": 4.05596399307251, + "objective/train/theoretical_loss": 4.061477953984786, + "objective/train/tokens_used": 379269600, + "objective/train/value_avg": -0.0220794677734375, + "objective/train/value_loss": 0.004748697858303785, + "objective/train/value_max": -0.0033111572265625, + "objective/train/value_min": -0.241455078125, + "objective/train/value_reward_corr": 0.03269806301170937, + "objective/train/value_std": 0.01739501953125, + "objective/train/weight_avg": 1.0002092123031616, + "objective/train/weighted_lm_loss": 4.056782245635986, + "objective/train/weights_max": 1.0226850509643555, + "objective/train/weights_min": 0.9327558279037476, + "theoretical_loss": 4.061477953984786, + "tokens_seen": 358809600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009001605136436597, + "loss": 2.3676, + "theoretical_loss": 4.060380918318552, + "tokens_seen": 359661568 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.006730925291776657, + "objective/train/docs_used": 215704, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.920666694641113, + "objective/train/original_loss": 4.9206671714782715, + "objective/train/theoretical_loss": 4.05937121827939, + "objective/train/tokens_used": 380908000, + "objective/train/value_avg": -0.0178680419921875, + "objective/train/value_loss": 0.0042742714285850525, + "objective/train/value_max": -0.0030040740966796875, + "objective/train/value_min": -0.291748046875, + "objective/train/value_reward_corr": 0.06680901824100542, + "objective/train/value_std": 0.010955810546875, + "objective/train/weight_avg": 1.0006940364837646, + "objective/train/weighted_lm_loss": 4.923538684844971, + "objective/train/weights_max": 1.029527187347412, + "objective/train/weights_min": 0.9152780175209045, + "theoretical_loss": 4.05937121827939, + "tokens_seen": 360448000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008998394863563402, + "loss": 2.4098, + "theoretical_loss": 4.059035278043591, + "tokens_seen": 360710144 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008995184590690208, + "loss": 2.3741, + "theoretical_loss": 4.057694635509866, + "tokens_seen": 361758720 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0060117351822555065, + "objective/train/docs_used": 216313, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.052402973175049, + "objective/train/original_loss": 5.052403450012207, + "objective/train/theoretical_loss": 4.057276704425439, + "objective/train/tokens_used": 382546400, + "objective/train/value_avg": -0.0203857421875, + "objective/train/value_loss": 0.0028666462749242783, + "objective/train/value_max": -0.00307464599609375, + "objective/train/value_min": -0.16845703125, + "objective/train/value_reward_corr": 0.2608936190951315, + "objective/train/value_std": 0.014892578125, + "objective/train/weight_avg": 1.0006153583526611, + "objective/train/weighted_lm_loss": 5.05720853805542, + "objective/train/weights_max": 1.0145622491836548, + "objective/train/weights_min": 0.9407134056091309, + "theoretical_loss": 4.057276704425439, + "tokens_seen": 362086400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008991974317817014, + "loss": 2.3823, + "theoretical_loss": 4.056358957749928, + "tokens_seen": 362807296 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.029890144243836403, + "objective/train/docs_used": 217479, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.955979347229004, + "objective/train/original_loss": 4.955979824066162, + "objective/train/theoretical_loss": 4.055194286696828, + "objective/train/tokens_used": 384184800, + "objective/train/value_avg": -0.0445556640625, + "objective/train/value_loss": 0.02254043146967888, + "objective/train/value_max": -0.00359344482421875, + "objective/train/value_min": -0.5224609375, + "objective/train/value_reward_corr": 0.9563276584125495, + "objective/train/value_std": 0.09771728515625, + "objective/train/weight_avg": 0.9971216917037964, + "objective/train/weighted_lm_loss": 4.949211120605469, + "objective/train/weights_max": 1.0131617784500122, + "objective/train/weights_min": 0.9183029532432556, + "theoretical_loss": 4.055194286696828, + "tokens_seen": 363724800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008988764044943821, + "loss": 2.4148, + "theoretical_loss": 4.055028212108276, + "tokens_seen": 363855872 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008985553772070626, + "loss": 2.4068, + "theoretical_loss": 4.053702366237517, + "tokens_seen": 364904448 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.005251666065305471, + "objective/train/docs_used": 218231, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.435400485992432, + "objective/train/original_loss": 4.43540096282959, + "objective/train/theoretical_loss": 4.053123841219701, + "objective/train/tokens_used": 385823200, + "objective/train/value_avg": -0.0251007080078125, + "objective/train/value_loss": 0.010179867967963219, + "objective/train/value_max": -0.00392913818359375, + "objective/train/value_min": -0.18603515625, + "objective/train/value_reward_corr": 0.12147941869109923, + "objective/train/value_std": 0.0162811279296875, + "objective/train/weight_avg": 0.999524712562561, + "objective/train/weighted_lm_loss": 4.436176776885986, + "objective/train/weights_max": 1.0178827047348022, + "objective/train/weights_min": 0.9114702939987183, + "theoretical_loss": 4.053123841219701, + "tokens_seen": 365363200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008982343499197432, + "loss": 2.4461, + "theoretical_loss": 4.05238138809458, + "tokens_seen": 365953024 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.010428987443447113, + "objective/train/docs_used": 219467, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.010043144226074, + "objective/train/original_loss": 5.010043621063232, + "objective/train/theoretical_loss": 4.051065245936996, + "objective/train/tokens_used": 387461600, + "objective/train/value_avg": -0.019378662109375, + "objective/train/value_loss": 0.0015359981916844845, + "objective/train/value_max": -0.0027904510498046875, + "objective/train/value_min": -0.18701171875, + "objective/train/value_reward_corr": -0.004101310767552348, + "objective/train/value_std": 0.01276397705078125, + "objective/train/weight_avg": 1.001050591468811, + "objective/train/weighted_lm_loss": 5.015827178955078, + "objective/train/weights_max": 1.0187898874282837, + "objective/train/weights_min": 0.9553699493408203, + "theoretical_loss": 4.051065245936996, + "tokens_seen": 367001600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008979133226324238, + "loss": 2.4025, + "theoretical_loss": 4.051065245936996, + "tokens_seen": 367001600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008975922953451043, + "loss": 2.424, + "theoretical_loss": 4.049753908319223, + "tokens_seen": 368050176 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.004717106930911541, + "objective/train/docs_used": 220098, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.83786153793335, + "objective/train/original_loss": 4.83786153793335, + "objective/train/theoretical_loss": 4.04901838057383, + "objective/train/tokens_used": 389100000, + "objective/train/value_avg": -0.0197906494140625, + "objective/train/value_loss": 0.012928929179906845, + "objective/train/value_max": -0.0037212371826171875, + "objective/train/value_min": -0.316650390625, + "objective/train/value_reward_corr": 0.026898424297488847, + "objective/train/value_std": 0.011688232421875, + "objective/train/weight_avg": 0.9995912313461304, + "objective/train/weighted_lm_loss": 4.834844589233398, + "objective/train/weights_max": 1.0321063995361328, + "objective/train/weights_min": 0.9062169194221497, + "theoretical_loss": 4.04901838057383, + "tokens_seen": 368640000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008972712680577849, + "loss": 2.4302, + "theoretical_loss": 4.048447344089038, + "tokens_seen": 369098752 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008969502407704655, + "loss": 2.4266, + "theoretical_loss": 4.04714552238397, + "tokens_seen": 370147328 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.007788795046508312, + "objective/train/docs_used": 221122, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.9446635246276855, + "objective/train/original_loss": 4.944662570953369, + "objective/train/theoretical_loss": 4.0469831266037, + "objective/train/tokens_used": 390738400, + "objective/train/value_avg": -0.0193328857421875, + "objective/train/value_loss": 0.0044091264717280865, + "objective/train/value_max": -0.00307464599609375, + "objective/train/value_min": -0.1934814453125, + "objective/train/value_reward_corr": 0.11424058942700718, + "objective/train/value_std": 0.0123291015625, + "objective/train/weight_avg": 1.0008004903793335, + "objective/train/weighted_lm_loss": 4.9492692947387695, + "objective/train/weights_max": 1.0194573402404785, + "objective/train/weights_min": 0.9137376546859741, + "theoretical_loss": 4.0469831266037, + "tokens_seen": 370278400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008966292134831461, + "loss": 2.4277, + "theoretical_loss": 4.045848412627802, + "tokens_seen": 371195904 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.01830151118338108, + "objective/train/docs_used": 221664, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.605759143829346, + "objective/train/original_loss": 4.605757713317871, + "objective/train/theoretical_loss": 4.04495936721546, + "objective/train/tokens_used": 392376800, + "objective/train/value_avg": -0.020263671875, + "objective/train/value_loss": 0.021253131330013275, + "objective/train/value_max": -0.0024623870849609375, + "objective/train/value_min": -0.34765625, + "objective/train/value_reward_corr": 0.19169245565410545, + "objective/train/value_std": 0.01535797119140625, + "objective/train/weight_avg": 0.9982733726501465, + "objective/train/weighted_lm_loss": 4.597793102264404, + "objective/train/weights_max": 1.0353058576583862, + "objective/train/weights_min": 0.9071756601333618, + "theoretical_loss": 4.04495936721546, + "tokens_seen": 371916800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008963081861958266, + "loss": 2.4213, + "theoretical_loss": 4.044555984527107, + "tokens_seen": 372244480 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008959871589085072, + "loss": 2.4035, + "theoretical_loss": 4.04326820806785, + "tokens_seen": 373293056 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.015137548558413982, + "objective/train/docs_used": 222906, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.793362140655518, + "objective/train/original_loss": 4.793362617492676, + "objective/train/theoretical_loss": 4.042946987281072, + "objective/train/tokens_used": 394015200, + "objective/train/value_avg": -0.0205841064453125, + "objective/train/value_loss": 0.0009655122994445264, + "objective/train/value_max": -0.0033245086669921875, + "objective/train/value_min": -0.171875, + "objective/train/value_reward_corr": 0.10367896468896269, + "objective/train/value_std": 0.0125885009765625, + "objective/train/weight_avg": 1.0015184879302979, + "objective/train/weighted_lm_loss": 4.801696300506592, + "objective/train/weights_max": 1.0172611474990845, + "objective/train/weights_min": 0.9350336790084839, + "theoretical_loss": 4.042946987281072, + "tokens_seen": 373555200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008956661316211878, + "loss": 2.4051, + "theoretical_loss": 4.041985053512038, + "tokens_seen": 374341632 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.01740625686943531, + "objective/train/docs_used": 223164, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.080158233642578, + "objective/train/original_loss": 5.08015775680542, + "objective/train/theoretical_loss": 4.040945873324096, + "objective/train/tokens_used": 395653600, + "objective/train/value_avg": -0.022247314453125, + "objective/train/value_loss": 0.0013966599944978952, + "objective/train/value_max": -0.003063201904296875, + "objective/train/value_min": -0.2509765625, + "objective/train/value_reward_corr": 0.0040506934093303155, + "objective/train/value_std": 0.01262664794921875, + "objective/train/weight_avg": 1.001747488975525, + "objective/train/weighted_lm_loss": 5.0896196365356445, + "objective/train/weights_max": 1.0253530740737915, + "objective/train/weights_min": 0.9103367924690247, + "theoretical_loss": 4.040945873324096, + "tokens_seen": 375193600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008953451043338684, + "loss": 2.3654, + "theoretical_loss": 4.040706491394406, + "tokens_seen": 375390208 + }, + { + "epoch": 0.11, + "learning_rate": 0.000895024077046549, + "loss": 2.3908, + "theoretical_loss": 4.0394324925191745, + "tokens_seen": 376438784 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.016474522650241852, + "objective/train/docs_used": 224842, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.552448749542236, + "objective/train/original_loss": 4.552449703216553, + "objective/train/theoretical_loss": 4.038955913488913, + "objective/train/tokens_used": 397292000, + "objective/train/value_avg": -0.020477294921875, + "objective/train/value_loss": 0.0008067811140790582, + "objective/train/value_max": -0.0034427642822265625, + "objective/train/value_min": -0.36328125, + "objective/train/value_reward_corr": -0.015538778456769759, + "objective/train/value_std": 0.01934814453125, + "objective/train/weight_avg": 1.0016515254974365, + "objective/train/weighted_lm_loss": 4.559835910797119, + "objective/train/weights_max": 1.0368744134902954, + "objective/train/weights_min": 0.9893465638160706, + "theoretical_loss": 4.038955913488913, + "tokens_seen": 376832000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008947030497592296, + "loss": 2.3934, + "theoretical_loss": 4.038163027956834, + "tokens_seen": 377487360 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0053961449302732944, + "objective/train/docs_used": 225570, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.7815351486206055, + "objective/train/original_loss": 4.781534671783447, + "objective/train/theoretical_loss": 4.036976997510639, + "objective/train/tokens_used": 398930400, + "objective/train/value_avg": -0.01776123046875, + "objective/train/value_loss": 0.002118194242939353, + "objective/train/value_max": -0.0038242340087890625, + "objective/train/value_min": -0.1884765625, + "objective/train/value_reward_corr": -0.015675866774934037, + "objective/train/value_std": 0.0111541748046875, + "objective/train/weight_avg": 1.0005500316619873, + "objective/train/weighted_lm_loss": 4.784369468688965, + "objective/train/weights_max": 1.0147067308425903, + "objective/train/weights_min": 0.9404784440994263, + "theoretical_loss": 4.036976997510639, + "tokens_seen": 378470400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008943820224719102, + "loss": 2.3958, + "theoretical_loss": 4.036898069040989, + "tokens_seen": 378535936 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008940609951845907, + "loss": 2.396, + "theoretical_loss": 4.03563758736524, + "tokens_seen": 379584512 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.010206539183855057, + "objective/train/docs_used": 226798, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.672510623931885, + "objective/train/original_loss": 4.672510623931885, + "objective/train/theoretical_loss": 4.035009016685741, + "objective/train/tokens_used": 400568800, + "objective/train/value_avg": -0.021636962890625, + "objective/train/value_loss": 0.0038914119359105825, + "objective/train/value_max": -0.00359344482421875, + "objective/train/value_min": -0.1502685546875, + "objective/train/value_reward_corr": 0.091772616459138, + "objective/train/value_std": 0.011993408203125, + "objective/train/weight_avg": 1.0010398626327515, + "objective/train/weighted_lm_loss": 4.6776814460754395, + "objective/train/weights_max": 1.0150790214538574, + "objective/train/weights_min": 0.90623539686203, + "theoretical_loss": 4.035009016685741, + "tokens_seen": 380108800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008937399678972713, + "loss": 2.4101, + "theoretical_loss": 4.034381554780124, + "tokens_seen": 380633088 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008934189406099519, + "loss": 2.3939, + "theoretical_loss": 4.033129943390076, + "tokens_seen": 381681664 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.000695896684192121, + "objective/train/docs_used": 227456, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.747595310211182, + "objective/train/original_loss": 4.747595310211182, + "objective/train/theoretical_loss": 4.033051863843309, + "objective/train/tokens_used": 402207200, + "objective/train/value_avg": -0.0236968994140625, + "objective/train/value_loss": 0.006727428641170263, + "objective/train/value_max": -0.003551483154296875, + "objective/train/value_min": -0.1324462890625, + "objective/train/value_reward_corr": 0.2329204653066495, + "objective/train/value_std": 0.016448974609375, + "objective/train/weight_avg": 0.9999632239341736, + "objective/train/weighted_lm_loss": 4.750112056732178, + "objective/train/weights_max": 1.0105714797973633, + "objective/train/weights_min": 0.9090884923934937, + "theoretical_loss": 4.033051863843309, + "tokens_seen": 381747200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008930979133226325, + "loss": 2.3869, + "theoretical_loss": 4.031882725550463, + "tokens_seen": 382730240 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.01200796291232109, + "objective/train/docs_used": 228876, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.790316104888916, + "objective/train/original_loss": 4.790315628051758, + "objective/train/theoretical_loss": 4.031105433316977, + "objective/train/tokens_used": 403845600, + "objective/train/value_avg": -0.0182952880859375, + "objective/train/value_loss": 0.0010693914955481887, + "objective/train/value_max": -0.0030155181884765625, + "objective/train/value_min": -0.11083984375, + "objective/train/value_reward_corr": 0.05505957946940863, + "objective/train/value_std": 0.01082611083984375, + "objective/train/weight_avg": 1.0012061595916748, + "objective/train/weighted_lm_loss": 4.796600818634033, + "objective/train/weights_max": 1.0108224153518677, + "objective/train/weights_min": 0.924217700958252, + "theoretical_loss": 4.031105433316977, + "tokens_seen": 383385600 + }, + { + "epoch": 0.12, + "learning_rate": 0.000892776886035313, + "loss": 2.4054, + "theoretical_loss": 4.030639873864638, + "tokens_seen": 383778816 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008924558587479936, + "loss": 2.3835, + "theoretical_loss": 4.029401361181049, + "tokens_seen": 384827392 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.005833991337567568, + "objective/train/docs_used": 229454, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.521109580993652, + "objective/train/original_loss": 4.521109580993652, + "objective/train/theoretical_loss": 4.029169620917484, + "objective/train/tokens_used": 405484000, + "objective/train/value_avg": -0.0238189697265625, + "objective/train/value_loss": 0.008846106007695198, + "objective/train/value_max": -0.00279998779296875, + "objective/train/value_min": -0.20263671875, + "objective/train/value_reward_corr": 0.19299608308347765, + "objective/train/value_std": 0.0172882080078125, + "objective/train/weight_avg": 0.999459981918335, + "objective/train/weighted_lm_loss": 4.522342681884766, + "objective/train/weights_max": 1.0203156471252441, + "objective/train/weights_min": 0.9072430729866028, + "theoretical_loss": 4.029169620917484, + "tokens_seen": 385024000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008921348314606742, + "loss": 2.3853, + "theoretical_loss": 4.028167160590383, + "tokens_seen": 385875968 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.008176982402801514, + "objective/train/docs_used": 230121, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.8355631828308105, + "objective/train/original_loss": 4.835562705993652, + "objective/train/theoretical_loss": 4.027244323905839, + "objective/train/tokens_used": 407122400, + "objective/train/value_avg": -0.0218353271484375, + "objective/train/value_loss": 0.004351518582552671, + "objective/train/value_max": -0.004329681396484375, + "objective/train/value_min": -0.1937255859375, + "objective/train/value_reward_corr": 0.10103069833006119, + "objective/train/value_std": 0.0146484375, + "objective/train/weight_avg": 1.0008389949798584, + "objective/train/weighted_lm_loss": 4.840237140655518, + "objective/train/weights_max": 1.01566743850708, + "objective/train/weights_min": 0.9093966484069824, + "theoretical_loss": 4.027244323905839, + "tokens_seen": 386662400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008918138041733547, + "loss": 2.3938, + "theoretical_loss": 4.026937245422756, + "tokens_seen": 386924544 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008914927768860353, + "loss": 2.3636, + "theoretical_loss": 4.025711589244939, + "tokens_seen": 387973120 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.009663601405918598, + "objective/train/docs_used": 231531, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.384364128112793, + "objective/train/original_loss": 5.384363651275635, + "objective/train/theoretical_loss": 4.025329440967096, + "objective/train/tokens_used": 408760800, + "objective/train/value_avg": -0.0251922607421875, + "objective/train/value_loss": 0.0028158528730273247, + "objective/train/value_max": -0.0031604766845703125, + "objective/train/value_min": -0.175537109375, + "objective/train/value_reward_corr": 0.3076409383355041, + "objective/train/value_std": 0.023895263671875, + "objective/train/weight_avg": 1.0009803771972656, + "objective/train/weighted_lm_loss": 5.388777732849121, + "objective/train/weights_max": 1.016689658164978, + "objective/train/weights_min": 0.94039386510849, + "theoretical_loss": 4.025329440967096, + "tokens_seen": 388300800 + }, + { + "epoch": 0.12, + "learning_rate": 0.000891171749598716, + "loss": 2.3763, + "theoretical_loss": 4.024490165857627, + "tokens_seen": 389021696 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.004226756282150745, + "objective/train/docs_used": 232298, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.373773097991943, + "objective/train/original_loss": 4.373773574829102, + "objective/train/theoretical_loss": 4.0234248721847035, + "objective/train/tokens_used": 410399200, + "objective/train/value_avg": -0.020599365234375, + "objective/train/value_loss": 0.00433869706466794, + "objective/train/value_max": -0.0029239654541015625, + "objective/train/value_min": -0.1260986328125, + "objective/train/value_reward_corr": 0.030611816353551702, + "objective/train/value_std": 0.011474609375, + "objective/train/weight_avg": 0.9995988011360168, + "objective/train/weighted_lm_loss": 4.372057914733887, + "objective/train/weights_max": 1.0126045942306519, + "objective/train/weights_min": 0.9681634902954102, + "theoretical_loss": 4.0234248721847035, + "tokens_seen": 389939200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008908507223113966, + "loss": 2.346, + "theoretical_loss": 4.023272949292743, + "tokens_seen": 390070272 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008905296950240771, + "loss": 2.3709, + "theoretical_loss": 4.022059913810782, + "tokens_seen": 391118848 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.021481268107891083, + "objective/train/docs_used": 233436, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.880739212036133, + "objective/train/original_loss": 4.880739212036133, + "objective/train/theoretical_loss": 4.021530519015424, + "objective/train/tokens_used": 412037600, + "objective/train/value_avg": -0.02734375, + "objective/train/value_loss": 0.0013162943068891764, + "objective/train/value_max": -0.003040313720703125, + "objective/train/value_min": -0.166015625, + "objective/train/value_reward_corr": 0.06633368819794556, + "objective/train/value_std": 0.026763916015625, + "objective/train/weight_avg": 1.00215482711792, + "objective/train/weighted_lm_loss": 4.892077922821045, + "objective/train/weights_max": 1.0166507959365845, + "objective/train/weights_min": 0.9767299890518188, + "theoretical_loss": 4.021530519015424, + "tokens_seen": 391577600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008902086677367577, + "loss": 2.3352, + "theoretical_loss": 4.020851033898196, + "tokens_seen": 392167424 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.02440703846514225, + "objective/train/docs_used": 234056, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.95131778717041, + "objective/train/original_loss": 4.95131778717041, + "objective/train/theoretical_loss": 4.019646284264807, + "objective/train/tokens_used": 413676000, + "objective/train/value_avg": -0.0341796875, + "objective/train/value_loss": 0.01034585665911436, + "objective/train/value_max": -0.003765106201171875, + "objective/train/value_min": -0.595703125, + "objective/train/value_reward_corr": -0.02274732827592528, + "objective/train/value_std": 0.083984375, + "objective/train/weight_avg": 1.0024927854537964, + "objective/train/weighted_lm_loss": 4.959307670593262, + "objective/train/weights_max": 1.061283826828003, + "objective/train/weights_min": 0.9259368181228638, + "theoretical_loss": 4.019646284264807, + "tokens_seen": 393216000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008898876404494383, + "loss": 2.3669, + "theoretical_loss": 4.019646284264807, + "tokens_seen": 393216000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008895666131621188, + "loss": 2.3624, + "theoretical_loss": 4.01844563984127, + "tokens_seen": 394264576 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.015339464880526066, + "objective/train/docs_used": 235431, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.333748817443848, + "objective/train/original_loss": 4.3337483406066895, + "objective/train/theoretical_loss": 4.017772072063201, + "objective/train/tokens_used": 415314400, + "objective/train/value_avg": -0.025115966796875, + "objective/train/value_loss": 0.004243725910782814, + "objective/train/value_max": -0.0031719207763671875, + "objective/train/value_min": -0.291259765625, + "objective/train/value_reward_corr": 0.02634579914711749, + "objective/train/value_std": 0.0260162353515625, + "objective/train/weight_avg": 1.0015548467636108, + "objective/train/weighted_lm_loss": 4.3390350341796875, + "objective/train/weights_max": 1.0294570922851562, + "objective/train/weights_min": 0.9218251705169678, + "theoretical_loss": 4.017772072063201, + "tokens_seen": 394854400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008892455858747994, + "loss": 2.3195, + "theoretical_loss": 4.0172490757765535, + "tokens_seen": 395313152 + }, + { + "epoch": 0.12, + "learning_rate": 0.00088892455858748, + "loss": 2.3279, + "theoretical_loss": 4.016056567435475, + "tokens_seen": 396361728 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.01985330320894718, + "objective/train/docs_used": 235840, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.2462615966796875, + "objective/train/original_loss": 5.246262073516846, + "objective/train/theoretical_loss": 4.0159077878422815, + "objective/train/tokens_used": 416952800, + "objective/train/value_avg": -0.0233001708984375, + "objective/train/value_loss": 0.001148558920249343, + "objective/train/value_max": -0.003063201904296875, + "objective/train/value_min": -0.316650390625, + "objective/train/value_reward_corr": 0.05249816861736054, + "objective/train/value_std": 0.024658203125, + "objective/train/weight_avg": 1.0019910335540771, + "objective/train/weighted_lm_loss": 5.256429672241211, + "objective/train/weights_max": 1.0316417217254639, + "objective/train/weights_min": 0.9773336052894592, + "theoretical_loss": 4.0159077878422815, + "tokens_seen": 396492800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008886035313001606, + "loss": 2.3137, + "theoretical_loss": 4.014868090396256, + "tokens_seen": 397410304 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.016719447448849678, + "objective/train/docs_used": 236468, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.510963439941406, + "objective/train/original_loss": 4.5109639167785645, + "objective/train/theoretical_loss": 4.014053338312108, + "objective/train/tokens_used": 418591200, + "objective/train/value_avg": -0.0248260498046875, + "objective/train/value_loss": 0.0021832494530826807, + "objective/train/value_max": -0.00359344482421875, + "objective/train/value_min": -0.158203125, + "objective/train/value_reward_corr": 0.02853634105510132, + "objective/train/value_std": 0.01555633544921875, + "objective/train/weight_avg": 1.0016827583312988, + "objective/train/weighted_lm_loss": 4.517514228820801, + "objective/train/weights_max": 1.0150771141052246, + "objective/train/weights_min": 0.9344548583030701, + "theoretical_loss": 4.014053338312108, + "tokens_seen": 398131200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008882825040128411, + "loss": 2.3529, + "theoretical_loss": 4.013683620448113, + "tokens_seen": 398458880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008879614767255216, + "loss": 2.3453, + "theoretical_loss": 4.0125031335888925, + "tokens_seen": 399507456 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.013258026912808418, + "objective/train/docs_used": 237759, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.007730484008789, + "objective/train/original_loss": 5.0077314376831055, + "objective/train/theoretical_loss": 4.0122086314386545, + "objective/train/tokens_used": 420229600, + "objective/train/value_avg": -0.0247650146484375, + "objective/train/value_loss": 0.0009518354199826717, + "objective/train/value_max": -0.0034027099609375, + "objective/train/value_min": -0.2135009765625, + "objective/train/value_reward_corr": 0.47026247162175927, + "objective/train/value_std": 0.0233917236328125, + "objective/train/weight_avg": 1.0013306140899658, + "objective/train/weighted_lm_loss": 5.015152454376221, + "objective/train/weights_max": 1.0161027908325195, + "objective/train/weights_min": 0.9649937748908997, + "theoretical_loss": 4.0122086314386545, + "tokens_seen": 399769600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008876404494382022, + "loss": 2.3457, + "theoretical_loss": 4.0113266060227275, + "tokens_seen": 400556032 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.03030279278755188, + "objective/train/docs_used": 238664, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.605350494384766, + "objective/train/original_loss": 4.605350017547607, + "objective/train/theoretical_loss": 4.010373576421845, + "objective/train/tokens_used": 421868000, + "objective/train/value_avg": -0.053375244140625, + "objective/train/value_loss": 0.01784617453813553, + "objective/train/value_max": -0.0024814605712890625, + "objective/train/value_min": -0.630859375, + "objective/train/value_reward_corr": 0.944390996355766, + "objective/train/value_std": 0.126953125, + "objective/train/weight_avg": 0.9970576167106628, + "objective/train/weighted_lm_loss": 4.598946571350098, + "objective/train/weights_max": 1.0148859024047852, + "objective/train/weights_min": 0.9088276028633118, + "theoretical_loss": 4.010373576421845, + "tokens_seen": 401408000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008873194221508828, + "loss": 2.3328, + "theoretical_loss": 4.010154014157727, + "tokens_seen": 401604608 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008869983948635633, + "loss": 2.331, + "theoretical_loss": 4.008985334603709, + "tokens_seen": 402653184 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.008215812034904957, + "objective/train/docs_used": 240054, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.316725254058838, + "objective/train/original_loss": 4.316725730895996, + "objective/train/theoretical_loss": 4.00854808367405, + "objective/train/tokens_used": 423506400, + "objective/train/value_avg": -0.0189208984375, + "objective/train/value_loss": 0.0016306565375998616, + "objective/train/value_max": -0.003185272216796875, + "objective/train/value_min": -0.17236328125, + "objective/train/value_reward_corr": 0.13183992272163017, + "objective/train/value_std": 0.01296234130859375, + "objective/train/weight_avg": 1.0008296966552734, + "objective/train/weighted_lm_loss": 4.32072639465332, + "objective/train/weights_max": 1.0137351751327515, + "objective/train/weights_min": 0.9180203676223755, + "theoretical_loss": 4.00854808367405, + "tokens_seen": 403046400 + }, + { + "epoch": 0.12, + "learning_rate": 0.000886677367576244, + "loss": 2.3184, + "theoretical_loss": 4.007820544169944, + "tokens_seen": 403701760 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.013373641297221184, + "objective/train/docs_used": 240788, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.72723388671875, + "objective/train/original_loss": 4.72723388671875, + "objective/train/theoretical_loss": 4.006732064799048, + "objective/train/tokens_used": 425144800, + "objective/train/value_avg": -0.0255126953125, + "objective/train/value_loss": 0.006565555930137634, + "objective/train/value_max": -0.0035381317138671875, + "objective/train/value_min": -0.259033203125, + "objective/train/value_reward_corr": 0.020116199793278708, + "objective/train/value_std": 0.0233612060546875, + "objective/train/weight_avg": 1.0013694763183594, + "objective/train/weighted_lm_loss": 4.733263969421387, + "objective/train/weights_max": 1.026158094406128, + "objective/train/weights_min": 0.9096135497093201, + "theoretical_loss": 4.006732064799048, + "tokens_seen": 404684800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008863563402889246, + "loss": 2.3377, + "theoretical_loss": 4.006659619862954, + "tokens_seen": 404750336 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008860353130016051, + "loss": 2.3381, + "theoretical_loss": 4.0055025388843175, + "tokens_seen": 405798912 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.00485390005633235, + "objective/train/docs_used": 241796, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.366023540496826, + "objective/train/original_loss": 4.366023540496826, + "objective/train/theoretical_loss": 4.004925432571433, + "objective/train/tokens_used": 426783200, + "objective/train/value_avg": -0.0219879150390625, + "objective/train/value_loss": 0.0034109752159565687, + "objective/train/value_max": -0.0028667449951171875, + "objective/train/value_min": -0.1282958984375, + "objective/train/value_reward_corr": 0.013348782976167626, + "objective/train/value_std": 0.01343536376953125, + "objective/train/weight_avg": 1.0005022287368774, + "objective/train/weighted_lm_loss": 4.36818790435791, + "objective/train/weights_max": 1.0128378868103027, + "objective/train/weights_min": 0.9108454585075378, + "theoretical_loss": 4.004925432571433, + "tokens_seen": 406323200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008857142857142857, + "loss": 2.32, + "theoretical_loss": 4.004349278628525, + "tokens_seen": 406847488 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008853932584269663, + "loss": 2.3479, + "theoretical_loss": 4.00319981668085, + "tokens_seen": 407896064 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.015751907601952553, + "objective/train/docs_used": 242638, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.241832733154297, + "objective/train/original_loss": 4.241832733154297, + "objective/train/theoretical_loss": 4.003128100916459, + "objective/train/tokens_used": 428421600, + "objective/train/value_avg": -0.0235137939453125, + "objective/train/value_loss": 0.0007387925870716572, + "objective/train/value_max": -0.003650665283203125, + "objective/train/value_min": -0.1209716796875, + "objective/train/value_reward_corr": 0.344073749092222, + "objective/train/value_std": 0.01508331298828125, + "objective/train/weight_avg": 1.0015789270401, + "objective/train/weighted_lm_loss": 4.248554706573486, + "objective/train/weights_max": 1.0120972394943237, + "objective/train/weights_min": 0.9749081134796143, + "theoretical_loss": 4.003128100916459, + "tokens_seen": 407961600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008850722311396469, + "loss": 2.3379, + "theoretical_loss": 4.002054130815253, + "tokens_seen": 408944640 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.020306263118982315, + "objective/train/docs_used": 244086, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.979942321777344, + "objective/train/original_loss": 4.97994327545166, + "objective/train/theoretical_loss": 4.0013399848903175, + "objective/train/tokens_used": 430060000, + "objective/train/value_avg": -0.0289306640625, + "objective/train/value_loss": 0.013821222819387913, + "objective/train/value_max": -0.0034427642822265625, + "objective/train/value_min": -0.33251953125, + "objective/train/value_reward_corr": 0.7942209727156933, + "objective/train/value_std": 0.041473388671875, + "objective/train/weight_avg": 0.998037576675415, + "objective/train/weighted_lm_loss": 4.97548246383667, + "objective/train/weights_max": 1.015438199043274, + "objective/train/weights_min": 0.9063540697097778, + "theoretical_loss": 4.0013399848903175, + "tokens_seen": 409600000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008847512038523274, + "loss": 2.3665, + "theoretical_loss": 4.000912198992316, + "tokens_seen": 409993216 + }, + { + "epoch": 0.12, + "learning_rate": 0.000884430176565008, + "loss": 2.2981, + "theoretical_loss": 3.9997739993572035, + "tokens_seen": 411041792 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.015990322455763817, + "objective/train/docs_used": 244630, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.007863998413086, + "objective/train/original_loss": 5.007863521575928, + "objective/train/theoretical_loss": 3.999561000660818, + "objective/train/tokens_used": 431698400, + "objective/train/value_avg": -0.0221710205078125, + "objective/train/value_loss": 0.0016131321899592876, + "objective/train/value_max": -0.002704620361328125, + "objective/train/value_min": -0.1427001953125, + "objective/train/value_reward_corr": 0.03576421471667393, + "objective/train/value_std": 0.013519287109375, + "objective/train/weight_avg": 1.0016069412231445, + "objective/train/weighted_lm_loss": 5.0160746574401855, + "objective/train/weights_max": 1.0142872333526611, + "objective/train/weights_min": 0.9101433753967285, + "theoretical_loss": 3.999561000660818, + "tokens_seen": 411238400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008841091492776886, + "loss": 2.3448, + "theoretical_loss": 3.9986395102376453, + "tokens_seen": 412090368 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.013736510649323463, + "objective/train/docs_used": 245807, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.461954116821289, + "objective/train/original_loss": 4.461954116821289, + "objective/train/theoretical_loss": 3.997791065488486, + "objective/train/tokens_used": 433336800, + "objective/train/value_avg": -0.018768310546875, + "objective/train/value_loss": 0.0015085627092048526, + "objective/train/value_max": -0.00307464599609375, + "objective/train/value_min": -0.115966796875, + "objective/train/value_reward_corr": 0.03814142215410524, + "objective/train/value_std": 0.01097869873046875, + "objective/train/weight_avg": 1.0013810396194458, + "objective/train/weighted_lm_loss": 4.46818733215332, + "objective/train/weights_max": 1.0115207433700562, + "objective/train/weights_min": 0.9145710468292236, + "theoretical_loss": 3.997791065488486, + "tokens_seen": 412876800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008837881219903691, + "loss": 2.3581, + "theoretical_loss": 3.99750871014196, + "tokens_seen": 413138944 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008834670947030497, + "loss": 2.3417, + "theoretical_loss": 3.9963815777570897, + "tokens_seen": 414187520 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.011399286799132824, + "objective/train/docs_used": 246540, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.7096734046936035, + "objective/train/original_loss": 4.7096734046936035, + "objective/train/theoretical_loss": 3.99603009770805, + "objective/train/tokens_used": 434975200, + "objective/train/value_avg": -0.0254669189453125, + "objective/train/value_loss": 0.005363051313906908, + "objective/train/value_max": -0.0029468536376953125, + "objective/train/value_min": -0.260498046875, + "objective/train/value_reward_corr": 0.3951285656706109, + "objective/train/value_std": 0.027099609375, + "objective/train/weight_avg": 1.0011663436889648, + "objective/train/weighted_lm_loss": 4.716434001922607, + "objective/train/weights_max": 1.0242726802825928, + "objective/train/weights_min": 0.9119225740432739, + "theoretical_loss": 3.99603009770805, + "tokens_seen": 414515200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008831460674157304, + "loss": 2.3456, + "theoretical_loss": 3.99525809194667, + "tokens_seen": 415236096 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.011637468822300434, + "objective/train/docs_used": 247824, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.2564849853515625, + "objective/train/original_loss": 4.2564849853515625, + "objective/train/theoretical_loss": 3.9942780167103145, + "objective/train/tokens_used": 436613600, + "objective/train/value_avg": -0.0242919921875, + "objective/train/value_loss": 0.0023338927421718836, + "objective/train/value_max": -0.0029582977294921875, + "objective/train/value_min": -0.119384765625, + "objective/train/value_reward_corr": 0.2513308986800464, + "objective/train/value_std": 0.0190277099609375, + "objective/train/weight_avg": 1.0011752843856812, + "objective/train/weighted_lm_loss": 4.260913848876953, + "objective/train/weights_max": 1.0101746320724487, + "objective/train/weights_min": 0.9208499193191528, + "theoretical_loss": 3.9942780167103145, + "tokens_seen": 416153600 + }, + { + "epoch": 0.13, + "learning_rate": 0.000882825040128411, + "loss": 2.3165, + "theoretical_loss": 3.9941382317491225, + "tokens_seen": 416284672 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008825040128410915, + "loss": 2.2994, + "theoretical_loss": 3.9930219763757755, + "tokens_seen": 417333248 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -0.010591602884232998, + "objective/train/docs_used": 248559, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.994689464569092, + "objective/train/original_loss": 4.994689464569092, + "objective/train/theoretical_loss": 3.992534742924409, + "objective/train/tokens_used": 438252000, + "objective/train/value_avg": -0.0177154541015625, + "objective/train/value_loss": 0.009227224625647068, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.16357421875, + "objective/train/value_reward_corr": 0.12505764875291064, + "objective/train/value_std": 0.01023101806640625, + "objective/train/weight_avg": 0.998986005783081, + "objective/train/weighted_lm_loss": 4.990962982177734, + "objective/train/weights_max": 1.0161893367767334, + "objective/train/weights_min": 0.9062947630882263, + "theoretical_loss": 3.992534742924409, + "tokens_seen": 417792000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008821829855537721, + "loss": 2.3167, + "theoretical_loss": 3.9919093052090058, + "tokens_seen": 418381824 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.004918180406093597, + "objective/train/docs_used": 249785, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.037671089172363, + "objective/train/original_loss": 5.037670135498047, + "objective/train/theoretical_loss": 3.9908001978004064, + "objective/train/tokens_used": 439890400, + "objective/train/value_avg": -0.01788330078125, + "objective/train/value_loss": 0.006649565417319536, + "objective/train/value_max": -0.0027790069580078125, + "objective/train/value_min": -0.11798095703125, + "objective/train/value_reward_corr": 0.05932967496091675, + "objective/train/value_std": 0.0107269287109375, + "objective/train/weight_avg": 1.0005241632461548, + "objective/train/weighted_lm_loss": 5.040229320526123, + "objective/train/weights_max": 1.0098241567611694, + "objective/train/weights_min": 0.9100860357284546, + "theoretical_loss": 3.9908001978004064, + "tokens_seen": 419430400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008818619582664527, + "loss": 2.3355, + "theoretical_loss": 3.9908001978004064, + "tokens_seen": 419430400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008815409309791332, + "loss": 2.3186, + "theoretical_loss": 3.989694633868981, + "tokens_seen": 420478976 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -0.026799293234944344, + "objective/train/docs_used": 250392, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.5576372146606445, + "objective/train/original_loss": 4.557637691497803, + "objective/train/theoretical_loss": 3.989074303792292, + "objective/train/tokens_used": 441528800, + "objective/train/value_avg": -0.04388427734375, + "objective/train/value_loss": 0.01957961730659008, + "objective/train/value_max": -0.00319671630859375, + "objective/train/value_min": -0.521484375, + "objective/train/value_reward_corr": 0.8918860528632908, + "objective/train/value_std": 0.07952880859375, + "objective/train/weight_avg": 0.9974165558815002, + "objective/train/weighted_lm_loss": 4.550442218780518, + "objective/train/weights_max": 1.0298092365264893, + "objective/train/weights_min": 0.9283219575881958, + "theoretical_loss": 3.989074303792292, + "tokens_seen": 421068800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008812199036918138, + "loss": 2.3011, + "theoretical_loss": 3.988592593299358, + "tokens_seen": 421527552 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008808988764044944, + "loss": 2.3195, + "theoretical_loss": 3.9874940561400294, + "tokens_seen": 422576128 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.009452788159251213, + "objective/train/docs_used": 251199, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.086025238037109, + "objective/train/original_loss": 4.086025714874268, + "objective/train/theoretical_loss": 3.9873569843412913, + "objective/train/tokens_used": 443167200, + "objective/train/value_avg": -0.0175628662109375, + "objective/train/value_loss": 0.002257883083075285, + "objective/train/value_max": -0.0032863616943359375, + "objective/train/value_min": -0.11859130859375, + "objective/train/value_reward_corr": 0.051931419229389264, + "objective/train/value_std": 0.0102691650390625, + "objective/train/weight_avg": 1.0009562969207764, + "objective/train/weighted_lm_loss": 4.089797019958496, + "objective/train/weights_max": 1.011245846748352, + "objective/train/weights_min": 0.9139437079429626, + "theoretical_loss": 3.9873569843412913, + "tokens_seen": 422707200 + }, + { + "epoch": 0.13, + "learning_rate": 0.000880577849117175, + "loss": 2.3052, + "theoretical_loss": 3.986399002601617, + "tokens_seen": 423624704 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.005796968005597591, + "objective/train/docs_used": 252247, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.057260990142822, + "objective/train/original_loss": 5.057260513305664, + "objective/train/theoretical_loss": 3.985648163859529, + "objective/train/tokens_used": 444805600, + "objective/train/value_avg": -0.0171356201171875, + "objective/train/value_loss": 0.004636615514755249, + "objective/train/value_max": -0.0032978057861328125, + "objective/train/value_min": -0.156005859375, + "objective/train/value_reward_corr": 0.041458358184097245, + "objective/train/value_std": 0.00914764404296875, + "objective/train/weight_avg": 1.0006023645401, + "objective/train/weighted_lm_loss": 5.0606279373168945, + "objective/train/weights_max": 1.015647292137146, + "objective/train/weights_min": 0.9186370968818665, + "theoretical_loss": 3.985648163859529, + "tokens_seen": 424345600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008802568218298555, + "loss": 2.313, + "theoretical_loss": 3.9853074130551542, + "tokens_seen": 424673280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008799357945425361, + "loss": 2.2716, + "theoretical_loss": 3.984219268030392, + "tokens_seen": 425721856 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -0.004399965517222881, + "objective/train/docs_used": 253004, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.648687839508057, + "objective/train/original_loss": 4.648688316345215, + "objective/train/theoretical_loss": 3.9839477677140245, + "objective/train/tokens_used": 446444000, + "objective/train/value_avg": -0.0186614990234375, + "objective/train/value_loss": 0.009716521017253399, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.127685546875, + "objective/train/value_reward_corr": 0.12740488295818672, + "objective/train/value_std": 0.0098114013671875, + "objective/train/weight_avg": 0.9996075630187988, + "objective/train/weighted_lm_loss": 4.646042823791504, + "objective/train/weights_max": 1.0127947330474854, + "objective/train/weights_min": 0.9080179333686829, + "theoretical_loss": 3.9839477677140245, + "tokens_seen": 425984000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008796147672552167, + "loss": 2.2769, + "theoretical_loss": 3.983134548214133, + "tokens_seen": 426770432 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0010341739980503917, + "objective/train/docs_used": 254260, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.746923446655273, + "objective/train/original_loss": 4.746922492980957, + "objective/train/theoretical_loss": 3.982255722211016, + "objective/train/tokens_used": 448082400, + "objective/train/value_avg": -0.036102294921875, + "objective/train/value_loss": 0.01657630316913128, + "objective/train/value_max": -0.0029010772705078125, + "objective/train/value_min": -0.56591796875, + "objective/train/value_reward_corr": 0.48115124403916826, + "objective/train/value_std": 0.068603515625, + "objective/train/weight_avg": 1.0001846551895142, + "objective/train/weighted_lm_loss": 4.748048782348633, + "objective/train/weights_max": 1.057641863822937, + "objective/train/weights_min": 0.9070830941200256, + "theoretical_loss": 3.982255722211016, + "tokens_seen": 427622400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008792937399678974, + "loss": 2.294, + "theoretical_loss": 3.98205323444858, + "tokens_seen": 427819008 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008789727126805779, + "loss": 2.2497, + "theoretical_loss": 3.9809753077297074, + "tokens_seen": 428867584 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.006395409815013409, + "objective/train/docs_used": 255066, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.87852144241333, + "objective/train/original_loss": 4.878521919250488, + "objective/train/theoretical_loss": 3.98057195458059, + "objective/train/tokens_used": 449720800, + "objective/train/value_avg": -0.0180816650390625, + "objective/train/value_loss": 0.005974596831947565, + "objective/train/value_max": -0.002471923828125, + "objective/train/value_min": -0.1695556640625, + "objective/train/value_reward_corr": 0.09343158177782822, + "objective/train/value_std": 0.0098114013671875, + "objective/train/weight_avg": 1.0006686449050903, + "objective/train/weighted_lm_loss": 4.88119649887085, + "objective/train/weights_max": 1.0170142650604248, + "objective/train/weights_min": 0.9077717065811157, + "theoretical_loss": 3.98057195458059, + "tokens_seen": 429260800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008786516853932585, + "loss": 2.3007, + "theoretical_loss": 3.979900749205657, + "tokens_seen": 429916160 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.006347690708935261, + "objective/train/docs_used": 256338, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.39447021484375, + "objective/train/original_loss": 4.394470691680908, + "objective/train/theoretical_loss": 3.9788963929616314, + "objective/train/tokens_used": 451359200, + "objective/train/value_avg": -0.0196075439453125, + "objective/train/value_loss": 0.0016145383706316352, + "objective/train/value_max": -0.0029926300048828125, + "objective/train/value_min": -0.170654296875, + "objective/train/value_reward_corr": 0.0436073778208598, + "objective/train/value_std": 0.0131683349609375, + "objective/train/weight_avg": 1.0006427764892578, + "objective/train/weighted_lm_loss": 4.399521827697754, + "objective/train/weights_max": 1.0166723728179932, + "objective/train/weights_min": 0.9340642690658569, + "theoretical_loss": 3.9788963929616314, + "tokens_seen": 430899200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008783306581059391, + "loss": 2.2885, + "theoretical_loss": 3.9788295401751483, + "tokens_seen": 430964736 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008780096308186196, + "loss": 2.3024, + "theoretical_loss": 3.9777616620859186, + "tokens_seen": 432013312 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -0.001948302611708641, + "objective/train/docs_used": 257001, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.973455429077148, + "objective/train/original_loss": 4.97345495223999, + "objective/train/theoretical_loss": 3.9772289663870657, + "objective/train/tokens_used": 452997600, + "objective/train/value_avg": -0.016571044921875, + "objective/train/value_loss": 0.0065133715979754925, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.1903076171875, + "objective/train/value_reward_corr": 0.019104439957538553, + "objective/train/value_std": 0.01004791259765625, + "objective/train/weight_avg": 0.9998372197151184, + "objective/train/weighted_lm_loss": 4.974050998687744, + "objective/train/weights_max": 1.019120216369629, + "objective/train/weights_min": 0.9414455890655518, + "theoretical_loss": 3.9772289663870657, + "tokens_seen": 432537600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008776886035313002, + "loss": 2.3138, + "theoretical_loss": 3.976697096533171, + "tokens_seen": 433061888 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008773675762439808, + "loss": 2.3417, + "theoretical_loss": 3.975635825258053, + "tokens_seen": 434110464 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -0.024588363245129585, + "objective/train/docs_used": 257982, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5868306159973145, + "objective/train/original_loss": 3.5868308544158936, + "objective/train/theoretical_loss": 3.975569604769402, + "objective/train/tokens_used": 454636000, + "objective/train/value_avg": -0.02935791015625, + "objective/train/value_loss": 0.020662814378738403, + "objective/train/value_max": -0.00284576416015625, + "objective/train/value_min": -0.306884765625, + "objective/train/value_reward_corr": 0.7514297187443195, + "objective/train/value_std": 0.0386962890625, + "objective/train/weight_avg": 0.9976426362991333, + "objective/train/weighted_lm_loss": 3.5809988975524902, + "objective/train/weights_max": 1.0182534456253052, + "objective/train/weights_min": 0.9159219861030579, + "theoretical_loss": 3.975569604769402, + "tokens_seen": 434176000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008770465489566614, + "loss": 2.3342, + "theoretical_loss": 3.9745778301461483, + "tokens_seen": 435159040 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.009240333922207355, + "objective/train/docs_used": 258654, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.74128532409668, + "objective/train/original_loss": 4.7412848472595215, + "objective/train/theoretical_loss": 3.9739182388865606, + "objective/train/tokens_used": 456274400, + "objective/train/value_avg": -0.0159759521484375, + "objective/train/value_loss": 0.002819393528625369, + "objective/train/value_max": -0.0026531219482421875, + "objective/train/value_min": -0.10931396484375, + "objective/train/value_reward_corr": 0.10028625176328561, + "objective/train/value_std": 0.0105438232421875, + "objective/train/weight_avg": 1.0009379386901855, + "objective/train/weighted_lm_loss": 4.745723724365234, + "objective/train/weights_max": 1.0107874870300293, + "objective/train/weights_min": 0.9355725049972534, + "theoretical_loss": 3.9739182388865606, + "tokens_seen": 435814400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008767255216693419, + "loss": 2.3403, + "theoretical_loss": 3.9735230932259893, + "tokens_seen": 436207616 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008764044943820225, + "loss": 2.3366, + "theoretical_loss": 3.9724715966675896, + "tokens_seen": 437256192 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.01133103296160698, + "objective/train/docs_used": 259916, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.280308723449707, + "objective/train/original_loss": 4.280308723449707, + "objective/train/theoretical_loss": 3.9722748003679818, + "objective/train/tokens_used": 457912800, + "objective/train/value_avg": -0.0245819091796875, + "objective/train/value_loss": 0.004381013102829456, + "objective/train/value_max": -0.0029125213623046875, + "objective/train/value_min": -0.220458984375, + "objective/train/value_reward_corr": 0.2416261286478263, + "objective/train/value_std": 0.027099609375, + "objective/train/weight_avg": 1.0011546611785889, + "objective/train/weighted_lm_loss": 4.285236835479736, + "objective/train/weights_max": 1.0219008922576904, + "objective/train/weights_min": 0.9140313267707825, + "theoretical_loss": 3.9722748003679818, + "tokens_seen": 437452800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008760834670947031, + "loss": 2.3563, + "theoretical_loss": 3.9714233227809936, + "tokens_seen": 438304768 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.011445336975157261, + "objective/train/docs_used": 260524, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.47276496887207, + "objective/train/original_loss": 4.472764492034912, + "objective/train/theoretical_loss": 3.9706392216810085, + "objective/train/tokens_used": 459551200, + "objective/train/value_avg": -0.032073974609375, + "objective/train/value_loss": 0.009282147511839867, + "objective/train/value_max": -0.0037212371826171875, + "objective/train/value_min": -0.2095947265625, + "objective/train/value_reward_corr": 0.04228870369821873, + "objective/train/value_std": 0.0266265869140625, + "objective/train/weight_avg": 1.001189947128296, + "objective/train/weighted_lm_loss": 4.477327823638916, + "objective/train/weights_max": 1.0210902690887451, + "objective/train/weights_min": 0.907429039478302, + "theoretical_loss": 3.9706392216810085, + "tokens_seen": 439091200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008757624398073836, + "loss": 2.3183, + "theoretical_loss": 3.970378254014844, + "tokens_seen": 439353344 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008754414125200642, + "loss": 2.3992, + "theoretical_loss": 3.96933637295497, + "tokens_seen": 440401920 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.006945675238966942, + "objective/train/docs_used": 261739, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.56090784072876, + "objective/train/original_loss": 4.560907363891602, + "objective/train/theoretical_loss": 3.969011436117536, + "objective/train/tokens_used": 461189600, + "objective/train/value_avg": -0.0177459716796875, + "objective/train/value_loss": 0.0020747194066643715, + "objective/train/value_max": -0.0028667449951171875, + "objective/train/value_min": -0.12115478515625, + "objective/train/value_reward_corr": 0.11365820331013642, + "objective/train/value_std": 0.01013946533203125, + "objective/train/weight_avg": 1.0007047653198242, + "objective/train/weighted_lm_loss": 4.563508987426758, + "objective/train/weights_max": 1.0120936632156372, + "objective/train/weights_min": 0.925132691860199, + "theoretical_loss": 3.969011436117536, + "tokens_seen": 440729600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008751203852327449, + "loss": 2.3358, + "theoretical_loss": 3.96829766232299, + "tokens_seen": 441450496 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.014029199257493019, + "objective/train/docs_used": 262441, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.443390369415283, + "objective/train/original_loss": 4.443390369415283, + "objective/train/theoretical_loss": 3.9673913777809253, + "objective/train/tokens_used": 462828000, + "objective/train/value_avg": -0.018829345703125, + "objective/train/value_loss": 0.00042568918433971703, + "objective/train/value_max": -0.0029239654541015625, + "objective/train/value_min": -0.1890869140625, + "objective/train/value_reward_corr": 0.0530080717429319, + "objective/train/value_std": 0.01055908203125, + "objective/train/weight_avg": 1.001405119895935, + "objective/train/weighted_lm_loss": 4.449625492095947, + "objective/train/weights_max": 1.0190242528915405, + "objective/train/weights_min": 0.9915249347686768, + "theoretical_loss": 3.9673913777809253, + "tokens_seen": 442368000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008747993579454255, + "loss": 2.3445, + "theoretical_loss": 3.9672621049749335, + "tokens_seen": 442499072 + }, + { + "epoch": 0.13, + "learning_rate": 0.000874478330658106, + "loss": 2.3125, + "theoretical_loss": 3.96622968389988, + "tokens_seen": 443547648 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0031080881599336863, + "objective/train/docs_used": 263839, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.065175533294678, + "objective/train/original_loss": 4.0651750564575195, + "objective/train/theoretical_loss": 3.9657789815731688, + "objective/train/tokens_used": 464466400, + "objective/train/value_avg": -0.0294036865234375, + "objective/train/value_loss": 0.009645309299230576, + "objective/train/value_max": -0.0032978057861328125, + "objective/train/value_min": -0.349609375, + "objective/train/value_reward_corr": 0.24882293273837047, + "objective/train/value_std": 0.040130615234375, + "objective/train/weight_avg": 1.0003581047058105, + "objective/train/weighted_lm_loss": 4.067264080047607, + "objective/train/weights_max": 1.0334569215774536, + "objective/train/weights_min": 0.910926878452301, + "theoretical_loss": 3.9657789815731688, + "tokens_seen": 444006400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008741573033707866, + "loss": 2.3231, + "theoretical_loss": 3.9652003822186166, + "tokens_seen": 444596224 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.009974481537938118, + "objective/train/docs_used": 264540, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.815402030944824, + "objective/train/original_loss": 4.815402030944824, + "objective/train/theoretical_loss": 3.9641741831823065, + "objective/train/tokens_used": 466104800, + "objective/train/value_avg": -0.0157623291015625, + "objective/train/value_loss": 0.0030697828624397516, + "objective/train/value_max": -0.0030155181884765625, + "objective/train/value_min": -0.15869140625, + "objective/train/value_reward_corr": 0.11739135747033566, + "objective/train/value_std": 0.00995635986328125, + "objective/train/weight_avg": 1.0010123252868652, + "objective/train/weighted_lm_loss": 4.819911956787109, + "objective/train/weights_max": 1.0159014463424683, + "objective/train/weights_min": 0.9073690176010132, + "theoretical_loss": 3.9641741831823065, + "tokens_seen": 445644800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008738362760834672, + "loss": 2.3185, + "theoretical_loss": 3.9641741831823065, + "tokens_seen": 445644800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008735152487961477, + "loss": 2.2958, + "theoretical_loss": 3.9631510701711816, + "tokens_seen": 446693376 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.0003989516699220985, + "objective/train/docs_used": 265849, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.936800479888916, + "objective/train/original_loss": 4.936800003051758, + "objective/train/theoretical_loss": 3.962576919070089, + "objective/train/tokens_used": 467743200, + "objective/train/value_avg": -0.020782470703125, + "objective/train/value_loss": 0.0060688587836921215, + "objective/train/value_max": -0.003780364990234375, + "objective/train/value_min": -0.1417236328125, + "objective/train/value_reward_corr": 0.10419627311030377, + "objective/train/value_std": 0.0132293701171875, + "objective/train/weight_avg": 0.9999899864196777, + "objective/train/weighted_lm_loss": 4.937167167663574, + "objective/train/weights_max": 1.0142031908035278, + "objective/train/weights_min": 0.919179379940033, + "theoretical_loss": 3.962576919070089, + "tokens_seen": 447283200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008731942215088283, + "loss": 2.2957, + "theoretical_loss": 3.9621310266932457, + "tokens_seen": 447741952 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008728731942215088, + "loss": 2.2905, + "theoretical_loss": 3.9611140363829977, + "tokens_seen": 448790528 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.008106669411063194, + "objective/train/docs_used": 266141, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.489448547363281, + "objective/train/original_loss": 4.489448547363281, + "objective/train/theoretical_loss": 3.960987126459872, + "objective/train/tokens_used": 469381600, + "objective/train/value_avg": -0.0285186767578125, + "objective/train/value_loss": 0.0062738507986068726, + "objective/train/value_max": -0.003337860107421875, + "objective/train/value_min": -0.28271484375, + "objective/train/value_reward_corr": 0.41128752771365545, + "objective/train/value_std": 0.0418701171875, + "objective/train/weight_avg": 1.000841498374939, + "objective/train/weighted_lm_loss": 4.49171781539917, + "objective/train/weights_max": 1.0219436883926392, + "objective/train/weights_min": 0.938148021697998, + "theoretical_loss": 3.960987126459872, + "tokens_seen": 448921600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008725521669341894, + "loss": 2.3064, + "theoretical_loss": 3.9601000830001665, + "tokens_seen": 449839104 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.0009546034270897508, + "objective/train/docs_used": 266764, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.859272003173828, + "objective/train/original_loss": 4.85927152633667, + "objective/train/theoretical_loss": 3.959404743324752, + "objective/train/tokens_used": 471020000, + "objective/train/value_avg": -0.033416748046875, + "objective/train/value_loss": 0.013998904265463352, + "objective/train/value_max": -0.0032482147216796875, + "objective/train/value_min": -0.29150390625, + "objective/train/value_reward_corr": 0.3276197544533572, + "objective/train/value_std": 0.040374755859375, + "objective/train/weight_avg": 0.9999730587005615, + "objective/train/weighted_lm_loss": 4.860424995422363, + "objective/train/weights_max": 1.0235795974731445, + "objective/train/weights_min": 0.9063645005226135, + "theoretical_loss": 3.959404743324752, + "tokens_seen": 450560000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008722311396468699, + "loss": 2.29, + "theoretical_loss": 3.9590891504284635, + "tokens_seen": 450887680 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008719101123595505, + "loss": 2.3054, + "theoretical_loss": 3.9580812226743523, + "tokens_seen": 451936256 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.01180922333151102, + "objective/train/docs_used": 268192, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.475308418273926, + "objective/train/original_loss": 4.475307941436768, + "objective/train/theoretical_loss": 3.9578297083759195, + "objective/train/tokens_used": 472658400, + "objective/train/value_avg": -0.0252227783203125, + "objective/train/value_loss": 0.007487733382731676, + "objective/train/value_max": -0.0033245086669921875, + "objective/train/value_min": -0.2467041015625, + "objective/train/value_reward_corr": 0.44534076625257346, + "objective/train/value_std": 0.0278778076171875, + "objective/train/weight_avg": 0.9988561272621155, + "objective/train/weighted_lm_loss": 4.471895217895508, + "objective/train/weights_max": 1.019981026649475, + "objective/train/weights_min": 0.9123241901397705, + "theoretical_loss": 3.9578297083759195, + "tokens_seen": 452198400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008715890850722311, + "loss": 2.3266, + "theoretical_loss": 3.95707628386583, + "tokens_seen": 452984832 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.014543934725224972, + "objective/train/docs_used": 268717, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.592695236206055, + "objective/train/original_loss": 4.592694282531738, + "objective/train/theoretical_loss": 3.9562619610512435, + "objective/train/tokens_used": 474296800, + "objective/train/value_avg": -0.0185089111328125, + "objective/train/value_loss": 0.0009651980944909155, + "objective/train/value_max": -0.00319671630859375, + "objective/train/value_min": -0.1815185546875, + "objective/train/value_reward_corr": 0.025745046403133232, + "objective/train/value_std": 0.01006317138671875, + "objective/train/weight_avg": 1.0014591217041016, + "objective/train/weighted_lm_loss": 4.599299907684326, + "objective/train/weights_max": 1.0182462930679321, + "objective/train/weights_min": 0.954782247543335, + "theoretical_loss": 3.9562619610512435, + "tokens_seen": 453836800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008712680577849116, + "loss": 2.3039, + "theoretical_loss": 3.9560743182512255, + "tokens_seen": 454033408 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008709470304975923, + "loss": 2.2701, + "theoretical_loss": 3.9550753101980103, + "tokens_seen": 455081984 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.005699384957551956, + "objective/train/docs_used": 269967, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.564932823181152, + "objective/train/original_loss": 4.564932823181152, + "objective/train/theoretical_loss": 3.954701441504068, + "objective/train/tokens_used": 475935200, + "objective/train/value_avg": -0.0301971435546875, + "objective/train/value_loss": 0.008857117034494877, + "objective/train/value_max": -0.0026531219482421875, + "objective/train/value_min": -0.324462890625, + "objective/train/value_reward_corr": 0.3763025610599045, + "objective/train/value_std": 0.04541015625, + "objective/train/weight_avg": 1.0006133317947388, + "objective/train/weighted_lm_loss": 4.567113399505615, + "objective/train/weights_max": 1.0277156829833984, + "objective/train/weights_min": 0.9093440771102905, + "theoretical_loss": 3.954701441504068, + "tokens_seen": 455475200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008706260032102729, + "loss": 2.2889, + "theoretical_loss": 3.954079244191628, + "tokens_seen": 456130560 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.016405003145337105, + "objective/train/docs_used": 270622, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.115323066711426, + "objective/train/original_loss": 4.115323066711426, + "objective/train/theoretical_loss": 3.9531480905922205, + "objective/train/tokens_used": 477573600, + "objective/train/value_avg": -0.02764892578125, + "objective/train/value_loss": 0.0027624163776636124, + "objective/train/value_max": -0.0026721954345703125, + "objective/train/value_min": -0.350830078125, + "objective/train/value_reward_corr": 0.30793866299179146, + "objective/train/value_std": 0.041351318359375, + "objective/train/weight_avg": 1.0016542673110962, + "objective/train/weighted_lm_loss": 4.1214118003845215, + "objective/train/weights_max": 1.0295641422271729, + "objective/train/weights_min": 0.9248126149177551, + "theoretical_loss": 3.9531480905922205, + "tokens_seen": 457113600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008703049759229535, + "loss": 2.2682, + "theoretical_loss": 3.953086104834334, + "tokens_seen": 457179136 + }, + { + "epoch": 0.14, + "learning_rate": 0.000869983948635634, + "loss": 2.2958, + "theoretical_loss": 3.9520958768440484, + "tokens_seen": 458227712 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0062491740100085735, + "objective/train/docs_used": 271952, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.652008056640625, + "objective/train/original_loss": 4.652008533477783, + "objective/train/theoretical_loss": 3.951601849867233, + "objective/train/tokens_used": 479212000, + "objective/train/value_avg": -0.0250244140625, + "objective/train/value_loss": 0.003489638911560178, + "objective/train/value_max": -0.0029239654541015625, + "objective/train/value_min": -0.217041015625, + "objective/train/value_reward_corr": 0.3192858753607967, + "objective/train/value_std": 0.0270233154296875, + "objective/train/weight_avg": 1.00064218044281, + "objective/train/weighted_lm_loss": 4.656381607055664, + "objective/train/weights_max": 1.0194610357284546, + "objective/train/weights_min": 0.9085555076599121, + "theoretical_loss": 3.951601849867233, + "tokens_seen": 458752000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008696629213483146, + "loss": 2.2367, + "theoretical_loss": 3.951108545053229, + "tokens_seen": 459276288 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008693418940609952, + "loss": 2.2671, + "theoretical_loss": 3.9501240944077494, + "tokens_seen": 460324864 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.00039071895298548043, + "objective/train/docs_used": 272724, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.875197410583496, + "objective/train/original_loss": 3.875197172164917, + "objective/train/theoretical_loss": 3.9500626615637597, + "objective/train/tokens_used": 480850400, + "objective/train/value_avg": -0.0195159912109375, + "objective/train/value_loss": 0.011394979432225227, + "objective/train/value_max": -0.00266265869140625, + "objective/train/value_min": -0.1300048828125, + "objective/train/value_reward_corr": 0.15700319939251442, + "objective/train/value_std": 0.01337432861328125, + "objective/train/weight_avg": 1.0000163316726685, + "objective/train/weighted_lm_loss": 3.873086929321289, + "objective/train/weights_max": 1.0127193927764893, + "objective/train/weights_min": 0.9062994718551636, + "theoretical_loss": 3.9500626615637597, + "tokens_seen": 460390400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008690208667736758, + "loss": 2.2682, + "theoretical_loss": 3.949142509965799, + "tokens_seen": 461373440 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.014879120513796806, + "objective/train/docs_used": 273842, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.748263359069824, + "objective/train/original_loss": 4.748263359069824, + "objective/train/theoretical_loss": 3.948530468589195, + "objective/train/tokens_used": 482488800, + "objective/train/value_avg": -0.0177764892578125, + "objective/train/value_loss": 0.000419639894971624, + "objective/train/value_max": -0.0032978057861328125, + "objective/train/value_min": -0.123779296875, + "objective/train/value_reward_corr": 0.04566470747691459, + "objective/train/value_std": 0.00928497314453125, + "objective/train/weight_avg": 1.0014899969100952, + "objective/train/weighted_lm_loss": 4.755583763122559, + "objective/train/weights_max": 1.012385606765747, + "objective/train/weights_min": 0.9815261960029602, + "theoretical_loss": 3.948530468589195, + "tokens_seen": 462028800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008686998394863563, + "loss": 2.2522, + "theoretical_loss": 3.9481637768967883, + "tokens_seen": 462422016 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008683788121990369, + "loss": 2.2305, + "theoretical_loss": 3.9471878804802736, + "tokens_seen": 463470592 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.0060041723772883415, + "objective/train/docs_used": 274430, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8331215381622314, + "objective/train/original_loss": 3.8331212997436523, + "objective/train/theoretical_loss": 3.94700521451349, + "objective/train/tokens_used": 484127200, + "objective/train/value_avg": -0.0283966064453125, + "objective/train/value_loss": 0.013856176286935806, + "objective/train/value_max": -0.002452850341796875, + "objective/train/value_min": -0.379638671875, + "objective/train/value_reward_corr": 0.07155252830379831, + "objective/train/value_std": 0.0372314453125, + "objective/train/weight_avg": 0.9994674324989319, + "objective/train/weighted_lm_loss": 3.8299062252044678, + "objective/train/weights_max": 1.0338950157165527, + "objective/train/weights_min": 0.9078178405761719, + "theoretical_loss": 3.94700521451349, + "tokens_seen": 463667200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008680577849117175, + "loss": 2.1907, + "theoretical_loss": 3.9462148061048907, + "tokens_seen": 464519168 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0074944449588656425, + "objective/train/docs_used": 275809, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.528003692626953, + "objective/train/original_loss": 4.528003692626953, + "objective/train/theoretical_loss": 3.9454868435591504, + "objective/train/tokens_used": 485765600, + "objective/train/value_avg": -0.027679443359375, + "objective/train/value_loss": 0.0019212161423638463, + "objective/train/value_max": -0.002452850341796875, + "objective/train/value_min": -0.365234375, + "objective/train/value_reward_corr": 0.4964637329944626, + "objective/train/value_std": 0.042236328125, + "objective/train/weight_avg": 1.0007590055465698, + "objective/train/weighted_lm_loss": 4.535093307495117, + "objective/train/weights_max": 1.02508544921875, + "objective/train/weights_min": 0.973685622215271, + "theoretical_loss": 3.9454868435591504, + "tokens_seen": 465305600 + }, + { + "epoch": 0.14, + "learning_rate": 0.000867736757624398, + "loss": 2.244, + "theoretical_loss": 3.945244539267303, + "tokens_seen": 465567744 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008674157303370786, + "loss": 2.2372, + "theoretical_loss": 3.9442770655711614, + "tokens_seen": 466616320 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.013008149340748787, + "objective/train/docs_used": 276320, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.6987690925598145, + "objective/train/original_loss": 4.698769569396973, + "objective/train/theoretical_loss": 3.9439753005914273, + "objective/train/tokens_used": 487404000, + "objective/train/value_avg": -0.0270538330078125, + "objective/train/value_loss": 0.01815665327012539, + "objective/train/value_max": -0.0029697418212890625, + "objective/train/value_min": -0.1678466796875, + "objective/train/value_reward_corr": 0.30510983263529073, + "objective/train/value_std": 0.0214385986328125, + "objective/train/weight_avg": 0.9987879395484924, + "objective/train/weighted_lm_loss": 4.691603660583496, + "objective/train/weights_max": 1.0133936405181885, + "objective/train/weights_min": 0.9066823720932007, + "theoretical_loss": 3.9439753005914273, + "tokens_seen": 466944000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008670947030497593, + "loss": 2.228, + "theoretical_loss": 3.9433123707260775, + "tokens_seen": 467664896 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.006873020902276039, + "objective/train/docs_used": 277290, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.895576477050781, + "objective/train/original_loss": 4.895576000213623, + "objective/train/theoretical_loss": 3.9424705311086856, + "objective/train/tokens_used": 489042400, + "objective/train/value_avg": -0.0198211669921875, + "objective/train/value_loss": 0.005097350105643272, + "objective/train/value_max": -0.0030994415283203125, + "objective/train/value_min": -0.109130859375, + "objective/train/value_reward_corr": 0.19017303735316177, + "objective/train/value_std": 0.01161956787109375, + "objective/train/weight_avg": 1.0007121562957764, + "objective/train/weighted_lm_loss": 4.899545192718506, + "objective/train/weights_max": 1.0106112957000732, + "objective/train/weights_min": 0.9060896635055542, + "theoretical_loss": 3.9424705311086856, + "tokens_seen": 468582400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008667736757624399, + "loss": 2.2335, + "theoretical_loss": 3.9423504405466074, + "tokens_seen": 468713472 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008664526484751204, + "loss": 2.2642, + "theoretical_loss": 3.9413912609512485, + "tokens_seen": 469762048 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.014749297872185707, + "objective/train/docs_used": 277851, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.715506076812744, + "objective/train/original_loss": 4.715505123138428, + "objective/train/theoretical_loss": 3.940972481232949, + "objective/train/tokens_used": 490680800, + "objective/train/value_avg": -0.022979736328125, + "objective/train/value_loss": 0.00215742620639503, + "objective/train/value_max": -0.0025310516357421875, + "objective/train/value_min": -0.276611328125, + "objective/train/value_reward_corr": 0.2129213374166885, + "objective/train/value_std": 0.0266265869140625, + "objective/train/weight_avg": 1.0014855861663818, + "objective/train/weighted_lm_loss": 4.721700668334961, + "objective/train/weights_max": 1.025691270828247, + "objective/train/weights_min": 0.9076271653175354, + "theoretical_loss": 3.940972481232949, + "tokens_seen": 470220800 + }, + { + "epoch": 0.14, + "learning_rate": 0.000866131621187801, + "loss": 2.2571, + "theoretical_loss": 3.940434817961448, + "tokens_seen": 470810624 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.00756754819303751, + "objective/train/docs_used": 279270, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.634400844573975, + "objective/train/original_loss": 4.634401798248291, + "objective/train/theoretical_loss": 3.939481097700623, + "objective/train/tokens_used": 492319200, + "objective/train/value_avg": -0.01904296875, + "objective/train/value_loss": 0.007862976752221584, + "objective/train/value_max": -0.002471923828125, + "objective/train/value_min": -0.2293701171875, + "objective/train/value_reward_corr": 0.054978130835563965, + "objective/train/value_std": 0.0168914794921875, + "objective/train/weight_avg": 0.9992818832397461, + "objective/train/weighted_lm_loss": 4.635465621948242, + "objective/train/weights_max": 1.0229477882385254, + "objective/train/weights_min": 0.9117174744606018, + "theoretical_loss": 3.939481097700623, + "tokens_seen": 471859200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008658105939004816, + "loss": 2.2256, + "theoretical_loss": 3.939481097700623, + "tokens_seen": 471859200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008654895666131621, + "loss": 2.2528, + "theoretical_loss": 3.9385300863931914, + "tokens_seen": 472907776 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.014175926335155964, + "objective/train/docs_used": 279967, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.1962738037109375, + "objective/train/original_loss": 4.1962738037109375, + "objective/train/theoretical_loss": 3.937996327853382, + "objective/train/tokens_used": 493957600, + "objective/train/value_avg": -0.0184783935546875, + "objective/train/value_loss": 0.00035451134317554533, + "objective/train/value_max": -0.0027790069580078125, + "objective/train/value_min": -0.148681640625, + "objective/train/value_reward_corr": 0.19842332505319893, + "objective/train/value_std": 0.01088714599609375, + "objective/train/weight_avg": 1.0014194250106812, + "objective/train/weighted_lm_loss": 4.202267646789551, + "objective/train/weights_max": 1.0146969556808472, + "objective/train/weights_min": 0.9931946992874146, + "theoretical_loss": 3.937996327853382, + "tokens_seen": 473497600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008651685393258427, + "loss": 2.2381, + "theoretical_loss": 3.9375817703636167, + "tokens_seen": 473956352 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008648475120385233, + "loss": 2.245, + "theoretical_loss": 3.9366361360354585, + "tokens_seen": 475004928 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0001795929274521768, + "objective/train/docs_used": 280356, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.5668559074401855, + "objective/train/original_loss": 4.5668559074401855, + "objective/train/theoretical_loss": 3.936518119629225, + "objective/train/tokens_used": 495596000, + "objective/train/value_avg": -0.018157958984375, + "objective/train/value_loss": 0.004545530769973993, + "objective/train/value_max": -0.003749847412109375, + "objective/train/value_min": -0.118408203125, + "objective/train/value_reward_corr": 0.19468962035389048, + "objective/train/value_std": 0.01006317138671875, + "objective/train/weight_avg": 1.0000402927398682, + "objective/train/weighted_lm_loss": 4.568256378173828, + "objective/train/weights_max": 1.0116528272628784, + "objective/train/weights_min": 0.9061692953109741, + "theoretical_loss": 3.936518119629225, + "tokens_seen": 475136000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008645264847512039, + "loss": 2.2132, + "theoretical_loss": 3.9356931699304427, + "tokens_seen": 476053504 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.006488191895186901, + "objective/train/docs_used": 281398, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.063389778137207, + "objective/train/original_loss": 4.063389778137207, + "objective/train/theoretical_loss": 3.9350464215537, + "objective/train/tokens_used": 497234400, + "objective/train/value_avg": -0.018768310546875, + "objective/train/value_loss": 0.0029144485015422106, + "objective/train/value_max": -0.00269317626953125, + "objective/train/value_min": -0.1644287109375, + "objective/train/value_reward_corr": 0.14866475291982484, + "objective/train/value_std": 0.01134490966796875, + "objective/train/weight_avg": 1.000663161277771, + "objective/train/weighted_lm_loss": 4.066357612609863, + "objective/train/weights_max": 1.0106462240219116, + "objective/train/weights_min": 0.9198702573776245, + "theoretical_loss": 3.9350464215537, + "tokens_seen": 476774400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008642054574638844, + "loss": 2.2038, + "theoretical_loss": 3.9347528586675304, + "tokens_seen": 477102080 + }, + { + "epoch": 0.14, + "learning_rate": 0.000863884430176565, + "loss": 2.2398, + "theoretical_loss": 3.9338151889620114, + "tokens_seen": 478150656 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.011796596460044384, + "objective/train/docs_used": 281924, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.7233710289001465, + "objective/train/original_loss": 4.723370552062988, + "objective/train/theoretical_loss": 3.933581182731271, + "objective/train/tokens_used": 498872800, + "objective/train/value_avg": -0.020721435546875, + "objective/train/value_loss": 0.0018828677712008357, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.11578369140625, + "objective/train/value_reward_corr": 0.1902800712969045, + "objective/train/value_std": 0.012664794921875, + "objective/train/weight_avg": 1.0011889934539795, + "objective/train/weighted_lm_loss": 4.728972911834717, + "objective/train/weights_max": 1.011029601097107, + "objective/train/weights_min": 0.9424435496330261, + "theoretical_loss": 3.933581182731271, + "tokens_seen": 478412800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008635634028892456, + "loss": 2.236, + "theoretical_loss": 3.9328801476245987, + "tokens_seen": 479199232 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.008281982503831387, + "objective/train/docs_used": 283309, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.448596477508545, + "objective/train/original_loss": 4.448596954345703, + "objective/train/theoretical_loss": 3.9321223528368607, + "objective/train/tokens_used": 500511200, + "objective/train/value_avg": -0.030181884765625, + "objective/train/value_loss": 0.004762884229421616, + "objective/train/value_max": -0.0029811859130859375, + "objective/train/value_min": -0.1619873046875, + "objective/train/value_reward_corr": 0.4229943116183801, + "objective/train/value_std": 0.0240478515625, + "objective/train/weight_avg": 1.0008516311645508, + "objective/train/weighted_lm_loss": 4.452341556549072, + "objective/train/weights_max": 1.0143009424209595, + "objective/train/weights_min": 0.9113909602165222, + "theoretical_loss": 3.9321223528368607, + "tokens_seen": 480051200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008632423756019261, + "loss": 2.2489, + "theoretical_loss": 3.9319477215605323, + "tokens_seen": 480247808 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008629213483146068, + "loss": 2.2721, + "theoretical_loss": 3.9310178977687045, + "tokens_seen": 481296384 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.00625236053019762, + "objective/train/docs_used": 283520, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.239785671234131, + "objective/train/original_loss": 4.239785194396973, + "objective/train/theoretical_loss": 3.930669882107529, + "objective/train/tokens_used": 502149600, + "objective/train/value_avg": -0.0218963623046875, + "objective/train/value_loss": 0.008341706357896328, + "objective/train/value_max": -0.0026531219482421875, + "objective/train/value_min": -0.1007080078125, + "objective/train/value_reward_corr": 0.20356606660012755, + "objective/train/value_std": 0.0147552490234375, + "objective/train/weight_avg": 1.0006659030914307, + "objective/train/weighted_lm_loss": 4.243607044219971, + "objective/train/weights_max": 1.0100175142288208, + "objective/train/weights_min": 0.9075663685798645, + "theoretical_loss": 3.930669882107529, + "tokens_seen": 481689600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008626003210272874, + "loss": 2.2481, + "theoretical_loss": 3.930090663340782, + "tokens_seen": 482344960 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": -0.007218700833618641, + "objective/train/docs_used": 284734, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.101147174835205, + "objective/train/original_loss": 5.101147174835205, + "objective/train/theoretical_loss": 3.9292237213343135, + "objective/train/tokens_used": 503788000, + "objective/train/value_avg": -0.0380859375, + "objective/train/value_loss": 0.012825297191739082, + "objective/train/value_max": -0.0027790069580078125, + "objective/train/value_min": -0.2509765625, + "objective/train/value_reward_corr": 0.42360738889402616, + "objective/train/value_std": 0.043121337890625, + "objective/train/weight_avg": 0.9993410706520081, + "objective/train/weighted_lm_loss": 5.097322940826416, + "objective/train/weights_max": 1.024773359298706, + "objective/train/weights_min": 0.9074034094810486, + "theoretical_loss": 3.9292237213343135, + "tokens_seen": 483328000 + }, + { + "epoch": 0.15, + "learning_rate": 0.000862279293739968, + "loss": 2.2389, + "theoretical_loss": 3.9291660054603454, + "tokens_seen": 483393536 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008619582664526485, + "loss": 2.2653, + "theoretical_loss": 3.9282439114020375, + "tokens_seen": 484442112 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.017200347036123276, + "objective/train/docs_used": 285372, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.78768253326416, + "objective/train/original_loss": 3.7876827716827393, + "objective/train/theoretical_loss": 3.927783821854201, + "objective/train/tokens_used": 505426400, + "objective/train/value_avg": -0.02947998046875, + "objective/train/value_loss": 0.005190836265683174, + "objective/train/value_max": -0.0030994415283203125, + "objective/train/value_min": -0.41796875, + "objective/train/value_reward_corr": 0.3880213546223726, + "objective/train/value_std": 0.038543701171875, + "objective/train/weight_avg": 1.0017457008361816, + "objective/train/weighted_lm_loss": 3.794609308242798, + "objective/train/weights_max": 1.0419528484344482, + "objective/train/weights_min": 0.9231107831001282, + "theoretical_loss": 3.927783821854201, + "tokens_seen": 484966400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008616372391653291, + "loss": 2.2463, + "theoretical_loss": 3.927324368530723, + "tokens_seen": 485490688 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008613162118780097, + "loss": 2.2606, + "theoretical_loss": 3.926407364300649, + "tokens_seen": 486539264 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": -0.0015587761299684644, + "objective/train/docs_used": 285999, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.734065055847168, + "objective/train/original_loss": 4.734065532684326, + "objective/train/theoretical_loss": 3.9263501355422585, + "objective/train/tokens_used": 507064800, + "objective/train/value_avg": -0.031341552734375, + "objective/train/value_loss": 0.006803025957196951, + "objective/train/value_max": -0.0022335052490234375, + "objective/train/value_min": -0.295166015625, + "objective/train/value_reward_corr": 0.5655909345178711, + "objective/train/value_std": 0.0428466796875, + "objective/train/weight_avg": 0.9998776912689209, + "objective/train/weighted_lm_loss": 4.733001232147217, + "objective/train/weights_max": 1.0246855020523071, + "objective/train/weights_min": 0.9345104098320007, + "theoretical_loss": 3.9263501355422585, + "tokens_seen": 486604800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008609951845906903, + "loss": 2.2443, + "theoretical_loss": 3.9254928862546303, + "tokens_seen": 487587840 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.010811884887516499, + "objective/train/docs_used": 287428, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.45264196395874, + "objective/train/original_loss": 4.452641010284424, + "objective/train/theoretical_loss": 3.9249226148038927, + "objective/train/tokens_used": 508703200, + "objective/train/value_avg": -0.0222320556640625, + "objective/train/value_loss": 0.0036367704160511494, + "objective/train/value_max": -0.0032596588134765625, + "objective/train/value_min": -0.144775390625, + "objective/train/value_reward_corr": 0.10301160176483132, + "objective/train/value_std": 0.01318359375, + "objective/train/weight_avg": 1.0010989904403687, + "objective/train/weighted_lm_loss": 4.458367824554443, + "objective/train/weights_max": 1.014472484588623, + "objective/train/weights_min": 0.9153738021850586, + "theoretical_loss": 3.9249226148038927, + "tokens_seen": 488243200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008606741573033708, + "loss": 2.2449, + "theoretical_loss": 3.9245809220232295, + "tokens_seen": 488636416 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008603531300160514, + "loss": 2.2431, + "theoretical_loss": 3.923671459323953, + "tokens_seen": 489684992 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.013551853597164154, + "objective/train/docs_used": 288116, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.470155715942383, + "objective/train/original_loss": 4.470154762268066, + "objective/train/theoretical_loss": 3.9235012125672473, + "objective/train/tokens_used": 510341600, + "objective/train/value_avg": -0.034454345703125, + "objective/train/value_loss": 0.0049345530569553375, + "objective/train/value_max": -0.0035648345947265625, + "objective/train/value_min": -0.31640625, + "objective/train/value_reward_corr": 0.38407235385315247, + "objective/train/value_std": 0.0469970703125, + "objective/train/weight_avg": 1.0013794898986816, + "objective/train/weighted_lm_loss": 4.4772443771362305, + "objective/train/weights_max": 1.0271648168563843, + "objective/train/weights_min": 0.9075713753700256, + "theoretical_loss": 3.9235012125672473, + "tokens_seen": 489881600 + }, + { + "epoch": 0.15, + "learning_rate": 0.000860032102728732, + "loss": 2.2545, + "theoretical_loss": 3.9227644859604562, + "tokens_seen": 490733568 + }, + { + "debugging/Self-BLEU-5": 0.43646517028931053, + "debugging/distinct-1-grams": 0.8073822333747359, + "debugging/distinct-2-grams": 0.976779594165329, + "debugging/entropy-1-grams": 5.794474064713653, + "debugging/entropy-2-grams": 6.45939179102421, + "debugging/length": 536.5, + "debugging/num_segments": 8, + "debugging/raw_token_scores_avg": 0.017666742205619812, + "debugging/raw_token_scores_std": 0.06696465611457825, + "epoch": 0.15, + "objective/train/advantage_avg": 0.008383609354496002, + "objective/train/docs_used": 289197, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 5.153895854949951, + "objective/train/original_loss": 5.153896331787109, + "objective/train/theoretical_loss": 3.9220858822757396, + "objective/train/tokens_used": 511980000, + "objective/train/value_avg": -0.0260467529296875, + "objective/train/value_loss": 0.004017084836959839, + "objective/train/value_max": -0.002704620361328125, + "objective/train/value_min": -0.1539306640625, + "objective/train/value_reward_corr": 0.3466883106536252, + "objective/train/value_std": 0.0225372314453125, + "objective/train/weight_avg": 1.0008580684661865, + "objective/train/weighted_lm_loss": 5.158296585083008, + "objective/train/weights_max": 1.015379548072815, + "objective/train/weights_min": 0.9196099638938904, + "theoretical_loss": 3.9220858822757396, + "tokens_seen": 491520000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008597110754414125, + "loss": 2.2696, + "theoretical_loss": 3.9218599898217583, + "tokens_seen": 491782144 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008593900481540931, + "loss": 2.2568, + "theoretical_loss": 3.92095795888146, + "tokens_seen": 492830720 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.003687913529574871, + "objective/train/docs_used": 289918, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.211852073669434, + "objective/train/original_loss": 4.211852550506592, + "objective/train/theoretical_loss": 3.9206765778807227, + "objective/train/tokens_used": 513618400, + "objective/train/value_avg": -0.0253143310546875, + "objective/train/value_loss": 0.005165142938494682, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.198486328125, + "objective/train/value_reward_corr": 0.27198630522697165, + "objective/train/value_std": 0.0206298828125, + "objective/train/weight_avg": 1.000394344329834, + "objective/train/weighted_lm_loss": 4.214310169219971, + "objective/train/weights_max": 1.0191713571548462, + "objective/train/weights_min": 0.9207713603973389, + "theoretical_loss": 3.9206765778807227, + "tokens_seen": 493158400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008590690208667738, + "loss": 2.2306, + "theoretical_loss": 3.9200583811969785, + "tokens_seen": 493879296 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.012492693029344082, + "objective/train/docs_used": 291263, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.600378513336182, + "objective/train/original_loss": 4.600378513336182, + "objective/train/theoretical_loss": 3.9192732538342785, + "objective/train/tokens_used": 515256800, + "objective/train/value_avg": -0.0233306884765625, + "objective/train/value_loss": 0.004369958303868771, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.1990966796875, + "objective/train/value_reward_corr": 0.14770809911344832, + "objective/train/value_std": 0.0189666748046875, + "objective/train/weight_avg": 1.0012706518173218, + "objective/train/weighted_lm_loss": 4.605587959289551, + "objective/train/weights_max": 1.0200270414352417, + "objective/train/weights_min": 0.9101675152778625, + "theoretical_loss": 3.9192732538342785, + "tokens_seen": 494796800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008587479935794544, + "loss": 2.2608, + "theoretical_loss": 3.919161244908785, + "tokens_seen": 494927872 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008584269662921349, + "loss": 2.2279, + "theoretical_loss": 3.918266538239653, + "tokens_seen": 495976448 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": -0.014070545323193073, + "objective/train/docs_used": 291574, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.996253490447998, + "objective/train/original_loss": 3.996253490447998, + "objective/train/theoretical_loss": 3.917875865082138, + "objective/train/tokens_used": 516895200, + "objective/train/value_avg": -0.042083740234375, + "objective/train/value_loss": 0.016891904175281525, + "objective/train/value_max": -0.00307464599609375, + "objective/train/value_min": -0.3427734375, + "objective/train/value_reward_corr": 0.30522620631769865, + "objective/train/value_std": 0.053955078125, + "objective/train/weight_avg": 0.9986760020256042, + "objective/train/weighted_lm_loss": 3.998897075653076, + "objective/train/weights_max": 1.0322813987731934, + "objective/train/weights_min": 0.9088698029518127, + "theoretical_loss": 3.917875865082138, + "tokens_seen": 496435200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008581059390048154, + "loss": 2.2489, + "theoretical_loss": 3.917374249493913, + "tokens_seen": 497025024 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.011275163851678371, + "objective/train/docs_used": 293024, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.779506683349609, + "objective/train/original_loss": 4.779506206512451, + "objective/train/theoretical_loss": 3.9164843670567215, + "objective/train/tokens_used": 518533600, + "objective/train/value_avg": -0.0267333984375, + "objective/train/value_loss": 0.0027533406391739845, + "objective/train/value_max": -0.0030155181884765625, + "objective/train/value_min": -0.2420654296875, + "objective/train/value_reward_corr": 0.4548286622637415, + "objective/train/value_std": 0.036895751953125, + "objective/train/weight_avg": 1.00114107131958, + "objective/train/weighted_lm_loss": 4.784395217895508, + "objective/train/weights_max": 1.0156291723251343, + "objective/train/weights_min": 0.9246231913566589, + "theoretical_loss": 3.9164843670567215, + "tokens_seen": 498073600 + }, + { + "epoch": 0.15, + "learning_rate": 0.000857784911717496, + "loss": 2.2374, + "theoretical_loss": 3.9164843670567215, + "tokens_seen": 498073600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008574638844301765, + "loss": 2.2497, + "theoretical_loss": 3.9155968793933273, + "tokens_seen": 499122176 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.010451622307300568, + "objective/train/docs_used": 293514, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.362644672393799, + "objective/train/original_loss": 4.362644195556641, + "objective/train/theoretical_loss": 3.9150987156702994, + "objective/train/tokens_used": 520172000, + "objective/train/value_avg": -0.015472412109375, + "objective/train/value_loss": 0.0020472139585763216, + "objective/train/value_max": -0.0022792816162109375, + "objective/train/value_min": -0.09844970703125, + "objective/train/value_reward_corr": 0.026278927092401295, + "objective/train/value_std": 0.0104827880859375, + "objective/train/weight_avg": 1.0010552406311035, + "objective/train/weighted_lm_loss": 4.3673906326293945, + "objective/train/weights_max": 1.0097835063934326, + "objective/train/weights_min": 0.9191088080406189, + "theoretical_loss": 3.9150987156702994, + "tokens_seen": 499712000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008571428571428571, + "loss": 2.2593, + "theoretical_loss": 3.9147117750483584, + "tokens_seen": 500170752 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008568218298555377, + "loss": 2.2456, + "theoretical_loss": 3.913829042645107, + "tokens_seen": 501219328 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.014168784953653812, + "objective/train/docs_used": 294722, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.242805004119873, + "objective/train/original_loss": 4.242804527282715, + "objective/train/theoretical_loss": 3.913718867308278, + "objective/train/tokens_used": 521810400, + "objective/train/value_avg": -0.0198822021484375, + "objective/train/value_loss": 0.0017252578400075436, + "objective/train/value_max": -0.00260162353515625, + "objective/train/value_min": -0.1275634765625, + "objective/train/value_reward_corr": 0.09308852403786598, + "objective/train/value_std": 0.0113677978515625, + "objective/train/weight_avg": 1.0014253854751587, + "objective/train/weighted_lm_loss": 4.24885892868042, + "objective/train/weights_max": 1.012100338935852, + "objective/train/weights_min": 0.9130321741104126, + "theoretical_loss": 3.913718867308278, + "tokens_seen": 501350400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008565008025682183, + "loss": 2.23, + "theoretical_loss": 3.912948670884827, + "tokens_seen": 502267904 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": -0.008386615663766861, + "objective/train/docs_used": 295112, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.616636753082275, + "objective/train/original_loss": 4.616637229919434, + "objective/train/theoretical_loss": 3.912344778822587, + "objective/train/tokens_used": 523448800, + "objective/train/value_avg": -0.0257110595703125, + "objective/train/value_loss": 0.011216583661735058, + "objective/train/value_max": -0.0028228759765625, + "objective/train/value_min": -0.138671875, + "objective/train/value_reward_corr": 0.26302281769670693, + "objective/train/value_std": 0.0172119140625, + "objective/train/weight_avg": 0.9992161393165588, + "objective/train/weighted_lm_loss": 4.6117963790893555, + "objective/train/weights_max": 1.013596534729004, + "objective/train/weights_min": 0.9076256155967712, + "theoretical_loss": 3.912344778822587, + "tokens_seen": 502988800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008561797752808988, + "loss": 2.2007, + "theoretical_loss": 3.912070648546038, + "tokens_seen": 503316480 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008558587479935794, + "loss": 2.2208, + "theoretical_loss": 3.9111949644838386, + "tokens_seen": 504365056 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.008247025310993195, + "objective/train/docs_used": 296352, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.432052135467529, + "objective/train/original_loss": 4.432052135467529, + "objective/train/theoretical_loss": 3.910976407525199, + "objective/train/tokens_used": 525087200, + "objective/train/value_avg": -0.01947021484375, + "objective/train/value_loss": 0.0011866887798532844, + "objective/train/value_max": -0.002349853515625, + "objective/train/value_min": -0.162353515625, + "objective/train/value_reward_corr": 0.4405288953109502, + "objective/train/value_std": 0.0157623291015625, + "objective/train/weight_avg": 1.0008306503295898, + "objective/train/weighted_lm_loss": 4.435558319091797, + "objective/train/weights_max": 1.0151125192642212, + "objective/train/weights_min": 0.9389023184776306, + "theoretical_loss": 3.910976407525199, + "tokens_seen": 504627200 + }, + { + "epoch": 0.15, + "learning_rate": 0.00085553772070626, + "loss": 2.2245, + "theoretical_loss": 3.910321607629225, + "tokens_seen": 505413632 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.013311585411429405, + "objective/train/docs_used": 297118, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.149994373321533, + "objective/train/original_loss": 4.149994373321533, + "objective/train/theoretical_loss": 3.9096137111817413, + "objective/train/tokens_used": 526725600, + "objective/train/value_avg": -0.01971435546875, + "objective/train/value_loss": 0.0008260476752184331, + "objective/train/value_max": -0.0025310516357421875, + "objective/train/value_min": -0.1739501953125, + "objective/train/value_reward_corr": 0.12093673157782002, + "objective/train/value_std": 0.012664794921875, + "objective/train/weight_avg": 1.0013352632522583, + "objective/train/weighted_lm_loss": 4.15568208694458, + "objective/train/weights_max": 1.0171329975128174, + "objective/train/weights_min": 0.9467586874961853, + "theoretical_loss": 3.9096137111817413, + "tokens_seen": 506265600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008552166934189405, + "loss": 2.225, + "theoretical_loss": 3.9094505669884168, + "tokens_seen": 506462208 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008548956661316212, + "loss": 2.2071, + "theoretical_loss": 3.9085818316421945, + "tokens_seen": 507510784 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.014119972474873066, + "objective/train/docs_used": 298304, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.6849045753479, + "objective/train/original_loss": 4.6849045753479, + "objective/train/theoretical_loss": 3.9082566480052314, + "objective/train/tokens_used": 528364000, + "objective/train/value_avg": -0.0210113525390625, + "objective/train/value_loss": 0.0018183434149250388, + "objective/train/value_max": -0.002471923828125, + "objective/train/value_min": -0.1444091796875, + "objective/train/value_reward_corr": 0.1163527410881477, + "objective/train/value_std": 0.01416778564453125, + "objective/train/weight_avg": 1.0014210939407349, + "objective/train/weighted_lm_loss": 4.691549301147461, + "objective/train/weights_max": 1.014484167098999, + "objective/train/weights_min": 0.9409686326980591, + "theoretical_loss": 3.9082566480052314, + "tokens_seen": 507904000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008545746388443018, + "loss": 2.2234, + "theoretical_loss": 3.9077153907452367, + "tokens_seen": 508559360 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.004487760365009308, + "objective/train/docs_used": 298986, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.59855842590332, + "objective/train/original_loss": 4.59855842590332, + "objective/train/theoretical_loss": 3.906905176649909, + "objective/train/tokens_used": 530002400, + "objective/train/value_avg": -0.0191497802734375, + "objective/train/value_loss": 0.0037381600122898817, + "objective/train/value_max": -0.002208709716796875, + "objective/train/value_min": -0.12408447265625, + "objective/train/value_reward_corr": 0.3386379885727804, + "objective/train/value_std": 0.01366424560546875, + "objective/train/weight_avg": 1.0004671812057495, + "objective/train/weighted_lm_loss": 4.600886821746826, + "objective/train/weights_max": 1.012028455734253, + "objective/train/weights_min": 0.9355151653289795, + "theoretical_loss": 3.906905176649909, + "tokens_seen": 509542400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008542536115569824, + "loss": 2.2342, + "theoretical_loss": 3.9068512335254724, + "tokens_seen": 509607936 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008539325842696629, + "loss": 2.2212, + "theoretical_loss": 3.905989349283435, + "tokens_seen": 510656512 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0115969218313694, + "objective/train/docs_used": 299996, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.423476219177246, + "objective/train/original_loss": 4.423475742340088, + "objective/train/theoretical_loss": 3.9055592562051764, + "objective/train/tokens_used": 531640800, + "objective/train/value_avg": -0.020233154296875, + "objective/train/value_loss": 0.001153065008111298, + "objective/train/value_max": -0.0027141571044921875, + "objective/train/value_min": -0.169677734375, + "objective/train/value_reward_corr": 0.2534466166957603, + "objective/train/value_std": 0.01357269287109375, + "objective/train/weight_avg": 1.0011653900146484, + "objective/train/weighted_lm_loss": 4.428945541381836, + "objective/train/weights_max": 1.0145628452301025, + "objective/train/weights_min": 0.9475721120834351, + "theoretical_loss": 3.9055592562051764, + "tokens_seen": 511180800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008536115569823435, + "loss": 2.238, + "theoretical_loss": 3.9051297273916257, + "tokens_seen": 511705088 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008532905296950241, + "loss": 2.2009, + "theoretical_loss": 3.9042723572938836, + "tokens_seen": 512753664 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.014041183516383171, + "objective/train/docs_used": 300643, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.457642078399658, + "objective/train/original_loss": 4.457642555236816, + "objective/train/theoretical_loss": 3.904218846189645, + "objective/train/tokens_used": 533279200, + "objective/train/value_avg": -0.0214691162109375, + "objective/train/value_loss": 0.001483121537603438, + "objective/train/value_max": -0.0032596588134765625, + "objective/train/value_min": -0.2113037109375, + "objective/train/value_reward_corr": 0.29387885750745074, + "objective/train/value_std": 0.023773193359375, + "objective/train/weight_avg": 1.0014115571975708, + "objective/train/weighted_lm_loss": 4.464019298553467, + "objective/train/weights_max": 1.0195393562316895, + "objective/train/weights_min": 0.9356825947761536, + "theoretical_loss": 3.904218846189645, + "tokens_seen": 512819200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008529695024077046, + "loss": 2.2307, + "theoretical_loss": 3.9034172285047597, + "tokens_seen": 513802240 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.013101750984787941, + "objective/train/docs_used": 301818, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.083953857421875, + "objective/train/original_loss": 4.083953857421875, + "objective/train/theoretical_loss": 3.9028839065452745, + "objective/train/tokens_used": 534917600, + "objective/train/value_avg": -0.021759033203125, + "objective/train/value_loss": 0.0009387825266458094, + "objective/train/value_max": -0.0026416778564453125, + "objective/train/value_min": -0.1685791015625, + "objective/train/value_reward_corr": 0.3811339078312896, + "objective/train/value_std": 0.01690673828125, + "objective/train/weight_avg": 1.0013148784637451, + "objective/train/weighted_lm_loss": 4.088780403137207, + "objective/train/weights_max": 1.0130985975265503, + "objective/train/weights_min": 0.9737159013748169, + "theoretical_loss": 3.9028839065452745, + "tokens_seen": 514457600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008526484751203852, + "loss": 2.191, + "theoretical_loss": 3.902564330608904, + "tokens_seen": 514850816 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008523274478330658, + "loss": 2.1873, + "theoretical_loss": 3.901713653260452, + "tokens_seen": 515899392 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.006641737651079893, + "objective/train/docs_used": 302480, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.264853477478027, + "objective/train/original_loss": 4.264853000640869, + "objective/train/theoretical_loss": 3.901554397631614, + "objective/train/tokens_used": 536556000, + "objective/train/value_avg": -0.025299072265625, + "objective/train/value_loss": 0.003232681192457676, + "objective/train/value_max": -0.0026416778564453125, + "objective/train/value_min": -0.1865234375, + "objective/train/value_reward_corr": 0.4619456862607618, + "objective/train/value_std": 0.0240936279296875, + "objective/train/weight_avg": 1.0006800889968872, + "objective/train/weighted_lm_loss": 4.268704414367676, + "objective/train/weights_max": 1.0114229917526245, + "objective/train/weights_min": 0.9144641160964966, + "theoretical_loss": 3.901554397631614, + "tokens_seen": 516096000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008520064205457464, + "loss": 2.1997, + "theoretical_loss": 3.900865186182421, + "tokens_seen": 516947968 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": -0.0014388379640877247, + "objective/train/docs_used": 303675, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.347054958343506, + "objective/train/original_loss": 4.347054958343506, + "objective/train/theoretical_loss": 3.9002302802201427, + "objective/train/tokens_used": 538194400, + "objective/train/value_avg": -0.020660400390625, + "objective/train/value_loss": 0.009405343793332577, + "objective/train/value_max": -0.002323150634765625, + "objective/train/value_min": -0.1171875, + "objective/train/value_reward_corr": 0.23130832933572004, + "objective/train/value_std": 0.01364898681640625, + "objective/train/weight_avg": 0.9999021291732788, + "objective/train/weighted_lm_loss": 4.3456621170043945, + "objective/train/weights_max": 1.0117244720458984, + "objective/train/weights_min": 0.9094386100769043, + "theoretical_loss": 3.9002302802201427, + "tokens_seen": 517734400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008516853932584269, + "loss": 2.2064, + "theoretical_loss": 3.9000189191661163, + "tokens_seen": 517996544 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008513643659711075, + "loss": 2.1757, + "theoretical_loss": 3.8991748420705363, + "tokens_seen": 519045120 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.006454078946262598, + "objective/train/docs_used": 304298, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.173398494720459, + "objective/train/original_loss": 4.173398494720459, + "objective/train/theoretical_loss": 3.8989115154886997, + "objective/train/tokens_used": 539832800, + "objective/train/value_avg": -0.0223541259765625, + "objective/train/value_loss": 0.0055063217878341675, + "objective/train/value_max": -0.0027790069580078125, + "objective/train/value_min": -0.2064208984375, + "objective/train/value_reward_corr": 0.12249968502285213, + "objective/train/value_std": 0.020477294921875, + "objective/train/weight_avg": 1.000672459602356, + "objective/train/weighted_lm_loss": 4.175512790679932, + "objective/train/weights_max": 1.017486572265625, + "objective/train/weights_min": 0.9219443202018738, + "theoretical_loss": 3.8989115154886997, + "tokens_seen": 519372800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008510433386837882, + "loss": 2.2277, + "theoretical_loss": 3.8983329448217905, + "tokens_seen": 520093696 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.01179590169340372, + "objective/train/docs_used": 305033, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.749757289886475, + "objective/train/original_loss": 4.749757766723633, + "objective/train/theoretical_loss": 3.8975980650160067, + "objective/train/tokens_used": 541471200, + "objective/train/value_avg": -0.0171661376953125, + "objective/train/value_loss": 0.00105331523809582, + "objective/train/value_max": -0.0027256011962890625, + "objective/train/value_min": -0.10858154296875, + "objective/train/value_reward_corr": 0.12526010928009237, + "objective/train/value_std": 0.01044464111328125, + "objective/train/weight_avg": 1.0011848211288452, + "objective/train/weighted_lm_loss": 4.755256652832031, + "objective/train/weights_max": 1.0108404159545898, + "objective/train/weights_min": 0.9487477540969849, + "theoretical_loss": 3.8975980650160067, + "tokens_seen": 521011200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008507223113964688, + "loss": 2.2009, + "theoretical_loss": 3.8974932174125194, + "tokens_seen": 521142272 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008504012841091493, + "loss": 2.1901, + "theoretical_loss": 3.896655649901324, + "tokens_seen": 522190848 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.007774941623210907, + "objective/train/docs_used": 306243, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.601417541503906, + "objective/train/original_loss": 4.601417541503906, + "objective/train/theoretical_loss": 3.8962898907762877, + "objective/train/tokens_used": 543109600, + "objective/train/value_avg": -0.02239990234375, + "objective/train/value_loss": 0.003927403595298529, + "objective/train/value_max": -0.0025119781494140625, + "objective/train/value_min": -0.2044677734375, + "objective/train/value_reward_corr": 0.3167381949750215, + "objective/train/value_std": 0.020111083984375, + "objective/train/weight_avg": 1.0007967948913574, + "objective/train/weighted_lm_loss": 4.605199337005615, + "objective/train/weights_max": 1.0192049741744995, + "objective/train/weights_min": 0.9242880344390869, + "theoretical_loss": 3.8962898907762877, + "tokens_seen": 522649600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008500802568218299, + "loss": 2.1589, + "theoretical_loss": 3.8958202324121984, + "tokens_seen": 523239424 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0078981202095747, + "objective/train/docs_used": 306972, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.519934177398682, + "objective/train/original_loss": 4.519933223724365, + "objective/train/theoretical_loss": 3.8949869551339704, + "objective/train/tokens_used": 544748000, + "objective/train/value_avg": -0.0293121337890625, + "objective/train/value_loss": 0.006940190214663744, + "objective/train/value_max": -0.0027370452880859375, + "objective/train/value_min": -0.47314453125, + "objective/train/value_reward_corr": 0.3543104864431124, + "objective/train/value_std": 0.04559326171875, + "objective/train/weight_avg": 1.000823974609375, + "objective/train/weighted_lm_loss": 4.523253440856934, + "objective/train/weights_max": 1.0386154651641846, + "objective/train/weights_min": 0.9111344814300537, + "theoretical_loss": 3.8949869551339704, + "tokens_seen": 524288000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008497592295345105, + "loss": 2.2051, + "theoretical_loss": 3.8949869551339704, + "tokens_seen": 524288000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000849438202247191, + "loss": 2.2073, + "theoretical_loss": 3.8941558083197467, + "tokens_seen": 525336576 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.011066360399127007, + "objective/train/docs_used": 308263, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.58984375, + "objective/train/original_loss": 4.589844703674316, + "objective/train/theoretical_loss": 3.8936892208384775, + "objective/train/tokens_used": 546386400, + "objective/train/value_avg": -0.0174713134765625, + "objective/train/value_loss": 0.001554504968225956, + "objective/train/value_max": -0.0029125213623046875, + "objective/train/value_min": -0.10687255859375, + "objective/train/value_reward_corr": 0.08611056229170547, + "objective/train/value_std": 0.0104522705078125, + "objective/train/weight_avg": 1.0011143684387207, + "objective/train/weighted_lm_loss": 4.595177173614502, + "objective/train/weights_max": 1.010162353515625, + "objective/train/weights_min": 0.9308030605316162, + "theoretical_loss": 3.8936892208384775, + "tokens_seen": 525926400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008491171749598716, + "loss": 2.191, + "theoretical_loss": 3.8933267822863646, + "tokens_seen": 526385152 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008487961476725522, + "loss": 2.1754, + "theoretical_loss": 3.8924998674138487, + "tokens_seen": 527433728 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": -0.008016165345907211, + "objective/train/docs_used": 309026, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.179019927978516, + "objective/train/original_loss": 4.179018497467041, + "objective/train/theoretical_loss": 3.892396651019104, + "objective/train/tokens_used": 548024800, + "objective/train/value_avg": -0.04132080078125, + "objective/train/value_loss": 0.014153994619846344, + "objective/train/value_max": -0.0029125213623046875, + "objective/train/value_min": -0.80908203125, + "objective/train/value_reward_corr": 0.8497587670028004, + "objective/train/value_std": 0.09881591796875, + "objective/train/weight_avg": 0.9992678165435791, + "objective/train/weighted_lm_loss": 4.179659843444824, + "objective/train/weights_max": 1.0740007162094116, + "objective/train/weights_min": 0.9090810418128967, + "theoretical_loss": 3.892396651019104, + "tokens_seen": 527564800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008484751203852328, + "loss": 2.1893, + "theoretical_loss": 3.8916750541448764, + "tokens_seen": 528482304 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.007499220315366983, + "objective/train/docs_used": 310459, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.090631484985352, + "objective/train/original_loss": 4.090631484985352, + "objective/train/theoretical_loss": 3.8911092091799793, + "objective/train/tokens_used": 549663200, + "objective/train/value_avg": -0.033355712890625, + "objective/train/value_loss": 0.01235093455761671, + "objective/train/value_max": -0.0025806427001953125, + "objective/train/value_min": -0.325439453125, + "objective/train/value_reward_corr": 0.276282996097151, + "objective/train/value_std": 0.034027099609375, + "objective/train/weight_avg": 1.0008105039596558, + "objective/train/weighted_lm_loss": 4.093561172485352, + "objective/train/weights_max": 1.0317844152450562, + "objective/train/weights_min": 0.9089686870574951, + "theoretical_loss": 3.8911092091799793, + "tokens_seen": 529203200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008481540930979133, + "loss": 2.1397, + "theoretical_loss": 3.890852332984242, + "tokens_seen": 529530880 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008478330658105939, + "loss": 2.1743, + "theoretical_loss": 3.890031694498337, + "tokens_seen": 530579456 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.005142169538885355, + "objective/train/docs_used": 311126, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.9897894859313965, + "objective/train/original_loss": 4.989789962768555, + "objective/train/theoretical_loss": 3.889826859195108, + "objective/train/tokens_used": 551301600, + "objective/train/value_avg": -0.01861572265625, + "objective/train/value_loss": 0.006360157392919064, + "objective/train/value_max": -0.0021076202392578125, + "objective/train/value_min": -0.1246337890625, + "objective/train/value_reward_corr": 0.16583430199825347, + "objective/train/value_std": 0.0150909423828125, + "objective/train/weight_avg": 1.0005452632904053, + "objective/train/weighted_lm_loss": 4.992581367492676, + "objective/train/weights_max": 1.012369990348816, + "objective/train/weights_min": 0.9083571434020996, + "theoretical_loss": 3.889826859195108, + "tokens_seen": 530841600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008475120385232745, + "loss": 2.1428, + "theoretical_loss": 3.8892131293146237, + "tokens_seen": 531628032 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": -0.014681972563266754, + "objective/train/docs_used": 312276, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.21794319152832, + "objective/train/original_loss": 4.21794319152832, + "objective/train/theoretical_loss": 3.888549565303496, + "objective/train/tokens_used": 552940000, + "objective/train/value_avg": -0.0250396728515625, + "objective/train/value_loss": 0.025772523134946823, + "objective/train/value_max": -0.00391387939453125, + "objective/train/value_min": -0.1431884765625, + "objective/train/value_reward_corr": 0.09761736661139396, + "objective/train/value_std": 0.0128326416015625, + "objective/train/weight_avg": 0.9986573457717896, + "objective/train/weighted_lm_loss": 4.210810661315918, + "objective/train/weights_max": 1.014011025428772, + "objective/train/weights_min": 0.9069389700889587, + "theoretical_loss": 3.888549565303496, + "tokens_seen": 532480000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000847191011235955, + "loss": 2.1534, + "theoretical_loss": 3.888396628121124, + "tokens_seen": 532676608 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008468699839486357, + "loss": 2.1977, + "theoretical_loss": 3.887582181665909, + "tokens_seen": 533725184 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.01397934090346098, + "objective/train/docs_used": 312792, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.787879943847656, + "objective/train/original_loss": 4.787879943847656, + "objective/train/theoretical_loss": 3.887277292104349, + "objective/train/tokens_used": 554578400, + "objective/train/value_avg": -0.0240936279296875, + "objective/train/value_loss": 0.005117475520819426, + "objective/train/value_max": -0.0035648345947265625, + "objective/train/value_min": -0.143798828125, + "objective/train/value_reward_corr": 0.23916639351301833, + "objective/train/value_std": 0.0158233642578125, + "objective/train/weight_avg": 1.0014231204986572, + "objective/train/weighted_lm_loss": 4.795126914978027, + "objective/train/weights_max": 1.0142654180526733, + "objective/train/weights_min": 0.9081979990005493, + "theoretical_loss": 3.887277292104349, + "tokens_seen": 534118400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008465489566613163, + "loss": 2.1558, + "theoretical_loss": 3.8867697807565937, + "tokens_seen": 534773760 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.012564142234623432, + "objective/train/docs_used": 314102, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.177690029144287, + "objective/train/original_loss": 4.177690029144287, + "objective/train/theoretical_loss": 3.8860100045523582, + "objective/train/tokens_used": 556216800, + "objective/train/value_avg": -0.0355224609375, + "objective/train/value_loss": 0.0036838401574641466, + "objective/train/value_max": -0.0018100738525390625, + "objective/train/value_min": -0.445068359375, + "objective/train/value_reward_corr": 0.6099621607025596, + "objective/train/value_std": 0.0614013671875, + "objective/train/weight_avg": 1.001274585723877, + "objective/train/weighted_lm_loss": 4.183071136474609, + "objective/train/weights_max": 1.036392331123352, + "objective/train/weights_min": 0.9065892696380615, + "theoretical_loss": 3.8860100045523582, + "tokens_seen": 535756800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008462279293739969, + "loss": 2.2091, + "theoretical_loss": 3.8859594162598396, + "tokens_seen": 535822336 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008459069020866774, + "loss": 2.1622, + "theoretical_loss": 3.8851510791008588, + "tokens_seen": 536870912 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.009344536811113358, + "objective/train/docs_used": 314596, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.544429302215576, + "objective/train/original_loss": 4.544428825378418, + "objective/train/theoretical_loss": 3.884747667953053, + "objective/train/tokens_used": 557855200, + "objective/train/value_avg": -0.0191650390625, + "objective/train/value_loss": 0.004685629159212112, + "objective/train/value_max": -0.0023326873779296875, + "objective/train/value_min": -0.137451171875, + "objective/train/value_reward_corr": 0.11339866817219046, + "objective/train/value_std": 0.01275634765625, + "objective/train/weight_avg": 1.0009573698043823, + "objective/train/weighted_lm_loss": 4.54789924621582, + "objective/train/weights_max": 1.0115092992782593, + "objective/train/weights_min": 0.908645749092102, + "theoretical_loss": 3.884747667953053, + "tokens_seen": 537395200 + }, + { + "epoch": 0.16, + "learning_rate": 0.000845585874799358, + "loss": 2.1621, + "theoretical_loss": 3.8843447602629277, + "tokens_seen": 537919488 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008452648475120386, + "loss": 2.1265, + "theoretical_loss": 3.883540450786901, + "tokens_seen": 538968064 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.00030250492272898555, + "objective/train/docs_used": 315427, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.374370098114014, + "objective/train/original_loss": 4.374370098114014, + "objective/train/theoretical_loss": 3.8834902479582314, + "objective/train/tokens_used": 559493600, + "objective/train/value_avg": -0.021697998046875, + "objective/train/value_loss": 0.004765336401760578, + "objective/train/value_max": -0.0019121170043945312, + "objective/train/value_min": -0.1629638671875, + "objective/train/value_reward_corr": 0.36566564032375126, + "objective/train/value_std": 0.0192413330078125, + "objective/train/weight_avg": 1.0000536441802979, + "objective/train/weighted_lm_loss": 4.374495029449463, + "objective/train/weights_max": 1.0116437673568726, + "objective/train/weights_min": 0.9097689390182495, + "theoretical_loss": 3.8834902479582314, + "tokens_seen": 539033600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008449438202247191, + "loss": 2.1764, + "theoretical_loss": 3.8827381417707327, + "tokens_seen": 540016640 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.012023418210446835, + "objective/train/docs_used": 316530, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.0470380783081055, + "objective/train/original_loss": 4.0470380783081055, + "objective/train/theoretical_loss": 3.8822377105614674, + "objective/train/tokens_used": 561132000, + "objective/train/value_avg": -0.017303466796875, + "objective/train/value_loss": 0.0008167194318957627, + "objective/train/value_max": -0.002132415771484375, + "objective/train/value_min": -0.12127685546875, + "objective/train/value_reward_corr": 0.16192798082614343, + "objective/train/value_std": 0.01160430908203125, + "objective/train/weight_avg": 1.001206398010254, + "objective/train/weighted_lm_loss": 4.051180839538574, + "objective/train/weights_max": 1.0121172666549683, + "objective/train/weights_min": 0.951406717300415, + "theoretical_loss": 3.8822377105614674, + "tokens_seen": 540672000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008446227929373997, + "loss": 2.1278, + "theoretical_loss": 3.8819378243690044, + "tokens_seen": 541065216 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008443017656500803, + "loss": 2.1341, + "theoretical_loss": 3.881139489792454, + "tokens_seen": 542113792 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.015202369540929794, + "objective/train/docs_used": 317270, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.288248538970947, + "objective/train/original_loss": 4.2882490158081055, + "objective/train/theoretical_loss": 3.880990022093684, + "objective/train/tokens_used": 562770400, + "objective/train/value_avg": -0.034210205078125, + "objective/train/value_loss": 0.005052571184933186, + "objective/train/value_max": -0.0021915435791015625, + "objective/train/value_min": -0.318115234375, + "objective/train/value_reward_corr": 0.5115278337805922, + "objective/train/value_std": 0.044189453125, + "objective/train/weight_avg": 1.0015453100204468, + "objective/train/weighted_lm_loss": 4.294933319091797, + "objective/train/weights_max": 1.0240323543548584, + "objective/train/weights_min": 0.9184608459472656, + "theoretical_loss": 3.880990022093684, + "tokens_seen": 542310400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008439807383627609, + "loss": 2.1238, + "theoretical_loss": 3.880343129307512, + "tokens_seen": 543162368 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.011693338863551617, + "objective/train/docs_used": 318285, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.179747581481934, + "objective/train/original_loss": 4.179747104644775, + "objective/train/theoretical_loss": 3.8797471492187987, + "objective/train/tokens_used": 564408800, + "objective/train/value_avg": -0.032379150390625, + "objective/train/value_loss": 0.006105293054133654, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.275390625, + "objective/train/value_reward_corr": 0.315494582434237, + "objective/train/value_std": 0.032440185546875, + "objective/train/weight_avg": 1.0011992454528809, + "objective/train/weighted_lm_loss": 4.1844024658203125, + "objective/train/weights_max": 1.0267901420593262, + "objective/train/weights_min": 0.9063621163368225, + "theoretical_loss": 3.8797471492187987, + "tokens_seen": 543948800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008436597110754414, + "loss": 2.1294, + "theoretical_loss": 3.879548734235843, + "tokens_seen": 544210944 + }, + { + "epoch": 0.17, + "learning_rate": 0.000843338683788122, + "loss": 2.1396, + "theoretical_loss": 3.878756295953889, + "tokens_seen": 545259520 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.004703739192336798, + "objective/train/docs_used": 318961, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.284043312072754, + "objective/train/original_loss": 4.284043312072754, + "objective/train/theoretical_loss": 3.878509058929443, + "objective/train/tokens_used": 566047200, + "objective/train/value_avg": -0.0216827392578125, + "objective/train/value_loss": 0.014929482713341713, + "objective/train/value_max": -0.002132415771484375, + "objective/train/value_min": -0.11553955078125, + "objective/train/value_reward_corr": 0.1853285889460185, + "objective/train/value_std": 0.0176544189453125, + "objective/train/weight_avg": 0.9996023178100586, + "objective/train/weighted_lm_loss": 4.280591011047363, + "objective/train/weights_max": 1.0107765197753906, + "objective/train/weights_min": 0.9072632193565369, + "theoretical_loss": 3.878509058929443, + "tokens_seen": 545587200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008430176565008026, + "loss": 2.1761, + "theoretical_loss": 3.87796580589242, + "tokens_seen": 546308096 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.006125511601567268, + "objective/train/docs_used": 320353, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.605869770050049, + "objective/train/original_loss": 4.605869293212891, + "objective/train/theoretical_loss": 3.877275718542742, + "objective/train/tokens_used": 567685600, + "objective/train/value_avg": -0.0202789306640625, + "objective/train/value_loss": 0.003352983621880412, + "objective/train/value_max": -0.002124786376953125, + "objective/train/value_min": -0.1270751953125, + "objective/train/value_reward_corr": 0.27012056919206856, + "objective/train/value_std": 0.015167236328125, + "objective/train/weight_avg": 1.0006290674209595, + "objective/train/weighted_lm_loss": 4.609113693237305, + "objective/train/weights_max": 1.012403130531311, + "objective/train/weights_min": 0.9079061150550842, + "theoretical_loss": 3.877275718542742, + "tokens_seen": 547225600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008426966292134832, + "loss": 2.1319, + "theoretical_loss": 3.877177255536089, + "tokens_seen": 547356672 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008423756019261637, + "loss": 2.1297, + "theoretical_loss": 3.8763906364229888, + "tokens_seen": 548405248 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.00022493260621558875, + "objective/train/docs_used": 320989, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.216316223144531, + "objective/train/original_loss": 4.216316223144531, + "objective/train/theoretical_loss": 3.8760470956961655, + "objective/train/tokens_used": 569324000, + "objective/train/value_avg": -0.020233154296875, + "objective/train/value_loss": 0.012074154801666737, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.1607666015625, + "objective/train/value_reward_corr": 0.18656973340579555, + "objective/train/value_std": 0.015167236328125, + "objective/train/weight_avg": 1.0000362396240234, + "objective/train/weighted_lm_loss": 4.214787006378174, + "objective/train/weights_max": 1.014664888381958, + "objective/train/weights_min": 0.9087254405021667, + "theoretical_loss": 3.8760470956961655, + "tokens_seen": 548864000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008420545746388443, + "loss": 2.0773, + "theoretical_loss": 3.875605940144217, + "tokens_seen": 549453824 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.004572282545268536, + "objective/train/docs_used": 322374, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9459383487701416, + "objective/train/original_loss": 3.9459383487701416, + "objective/train/theoretical_loss": 3.8748231583434425, + "objective/train/tokens_used": 570962400, + "objective/train/value_avg": -0.02374267578125, + "objective/train/value_loss": 0.00432850606739521, + "objective/train/value_max": -0.00229644775390625, + "objective/train/value_min": -0.11029052734375, + "objective/train/value_reward_corr": 0.18547074054402812, + "objective/train/value_std": 0.014923095703125, + "objective/train/weight_avg": 1.0004785060882568, + "objective/train/weighted_lm_loss": 3.9492201805114746, + "objective/train/weights_max": 1.0099252462387085, + "objective/train/weights_min": 0.9096171855926514, + "theoretical_loss": 3.8748231583434425, + "tokens_seen": 550502400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008417335473515249, + "loss": 2.0937, + "theoretical_loss": 3.8748231583434425, + "tokens_seen": 550502400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008414125200642054, + "loss": 2.1124, + "theoretical_loss": 3.8740422827164784, + "tokens_seen": 551550976 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.003507553366944194, + "objective/train/docs_used": 322945, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.354035377502441, + "objective/train/original_loss": 4.354034900665283, + "objective/train/theoretical_loss": 3.8736038747505406, + "objective/train/tokens_used": 572600800, + "objective/train/value_avg": -0.0273590087890625, + "objective/train/value_loss": 0.010962801985442638, + "objective/train/value_max": -0.0022602081298828125, + "objective/train/value_min": -0.270263671875, + "objective/train/value_reward_corr": 0.2509957790471412, + "objective/train/value_std": 0.034576416015625, + "objective/train/weight_avg": 1.0004042387008667, + "objective/train/weighted_lm_loss": 4.354303359985352, + "objective/train/weights_max": 1.0219426155090332, + "objective/train/weights_min": 0.9079451560974121, + "theoretical_loss": 3.8736038747505406, + "tokens_seen": 552140800 + }, + { + "epoch": 0.17, + "learning_rate": 0.000841091492776886, + "loss": 2.1126, + "theoretical_loss": 3.873263305010858, + "tokens_seen": 552599552 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008407704654895666, + "loss": 2.1223, + "theoretical_loss": 3.872486217025413, + "tokens_seen": 553648128 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.0026698471046984196, + "objective/train/docs_used": 324208, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.985201597213745, + "objective/train/original_loss": 3.9852020740509033, + "objective/train/theoretical_loss": 3.872389213491709, + "objective/train/tokens_used": 574239200, + "objective/train/value_avg": -0.03314208984375, + "objective/train/value_loss": 0.012698594480752945, + "objective/train/value_max": -0.0024242401123046875, + "objective/train/value_min": -0.341796875, + "objective/train/value_reward_corr": 0.4235575016476783, + "objective/train/value_std": 0.050750732421875, + "objective/train/weight_avg": 0.9997951984405518, + "objective/train/weighted_lm_loss": 3.9839985370635986, + "objective/train/weights_max": 1.0286000967025757, + "objective/train/weights_min": 0.9058116674423218, + "theoretical_loss": 3.872389213491709, + "tokens_seen": 553779200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008404494382022472, + "loss": 2.0994, + "theoretical_loss": 3.8717110106098627, + "tokens_seen": 554696704 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.00851005595177412, + "objective/train/docs_used": 324930, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.687680244445801, + "objective/train/original_loss": 4.687679290771484, + "objective/train/theoretical_loss": 3.8711791434455787, + "objective/train/tokens_used": 575877600, + "objective/train/value_avg": -0.0169830322265625, + "objective/train/value_loss": 0.0025041699409484863, + "objective/train/value_max": -0.002704620361328125, + "objective/train/value_min": -0.1510009765625, + "objective/train/value_reward_corr": 0.26527233986454196, + "objective/train/value_std": 0.0137786865234375, + "objective/train/weight_avg": 1.0008633136749268, + "objective/train/weighted_lm_loss": 4.691444396972656, + "objective/train/weights_max": 1.0126286745071411, + "objective/train/weights_min": 0.9070209264755249, + "theoretical_loss": 3.8711791434455787, + "tokens_seen": 555417600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008401284109149277, + "loss": 2.1188, + "theoretical_loss": 3.870937677664398, + "tokens_seen": 555745280 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008398073836276083, + "loss": 2.0798, + "theoretical_loss": 3.870166210139278, + "tokens_seen": 556793856 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.012440879829227924, + "objective/train/docs_used": 326252, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.151572227478027, + "objective/train/original_loss": 4.1515727043151855, + "objective/train/theoretical_loss": 3.869973633791332, + "objective/train/tokens_used": 577516000, + "objective/train/value_avg": -0.038421630859375, + "objective/train/value_loss": 0.0065834252163767815, + "objective/train/value_max": -0.002368927001953125, + "objective/train/value_min": -0.416015625, + "objective/train/value_reward_corr": 0.5759957171259185, + "objective/train/value_std": 0.062103271484375, + "objective/train/weight_avg": 1.0012767314910889, + "objective/train/weighted_lm_loss": 4.157973289489746, + "objective/train/weights_max": 1.0360215902328491, + "objective/train/weights_min": 0.9218615889549255, + "theoretical_loss": 3.869973633791332, + "tokens_seen": 557056000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000839486356340289, + "loss": 2.1098, + "theoretical_loss": 3.8693966000344253, + "tokens_seen": 557842432 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0018988207448273897, + "objective/train/docs_used": 326886, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.776794910430908, + "objective/train/original_loss": 3.776794910430908, + "objective/train/theoretical_loss": 3.8687726540049234, + "objective/train/tokens_used": 579154400, + "objective/train/value_avg": -0.02288818359375, + "objective/train/value_loss": 0.007520005572587252, + "objective/train/value_max": -0.001949310302734375, + "objective/train/value_min": -0.2452392578125, + "objective/train/value_reward_corr": 0.4574822477674453, + "objective/train/value_std": 0.0259552001953125, + "objective/train/weight_avg": 1.0002268552780151, + "objective/train/weighted_lm_loss": 3.7781412601470947, + "objective/train/weights_max": 1.021052360534668, + "objective/train/weights_min": 0.9133797287940979, + "theoretical_loss": 3.8687726540049234, + "tokens_seen": 558694400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008391653290529694, + "loss": 2.0601, + "theoretical_loss": 3.868628839399026, + "tokens_seen": 558891008 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008388443017656501, + "loss": 2.0706, + "theoretical_loss": 3.8678629203311368, + "tokens_seen": 559939584 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0034994531888514757, + "objective/train/docs_used": 327930, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.051071643829346, + "objective/train/original_loss": 4.051072597503662, + "objective/train/theoretical_loss": 3.8675761738553596, + "objective/train/tokens_used": 580792800, + "objective/train/value_avg": -0.017578125, + "objective/train/value_loss": 0.007838908582925797, + "objective/train/value_max": -0.002620697021484375, + "objective/train/value_min": -0.14111328125, + "objective/train/value_reward_corr": 0.28866242442267814, + "objective/train/value_std": 0.01192474365234375, + "objective/train/weight_avg": 1.0003881454467773, + "objective/train/weighted_lm_loss": 4.0526018142700195, + "objective/train/weights_max": 1.0114092826843262, + "objective/train/weights_min": 0.9090891480445862, + "theoretical_loss": 3.8675761738553596, + "tokens_seen": 560332800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008385232744783307, + "loss": 2.0626, + "theoretical_loss": 3.8670988349772912, + "tokens_seen": 560988160 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.01213999092578888, + "objective/train/docs_used": 328746, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.648550033569336, + "objective/train/original_loss": 4.648550033569336, + "objective/train/theoretical_loss": 3.866384163401042, + "objective/train/tokens_used": 582431200, + "objective/train/value_avg": -0.0155487060546875, + "objective/train/value_loss": 0.0005900830146856606, + "objective/train/value_max": -0.001926422119140625, + "objective/train/value_min": -0.155029296875, + "objective/train/value_reward_corr": 0.41825096383765614, + "objective/train/value_std": 0.01044464111328125, + "objective/train/weight_avg": 1.001217007637024, + "objective/train/weighted_lm_loss": 4.654037952423096, + "objective/train/weights_max": 1.0099551677703857, + "objective/train/weights_min": 0.9688600301742554, + "theoretical_loss": 3.866384163401042, + "tokens_seen": 561971200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008382022471910113, + "loss": 2.0698, + "theoretical_loss": 3.8663365755321157, + "tokens_seen": 562036736 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008378812199036918, + "loss": 2.0962, + "theoretical_loss": 3.865576134237943, + "tokens_seen": 563085312 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.01502026803791523, + "objective/train/docs_used": 329425, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.342256546020508, + "objective/train/original_loss": 4.34225606918335, + "objective/train/theoretical_loss": 3.8651965929861625, + "objective/train/tokens_used": 584069600, + "objective/train/value_avg": -0.018341064453125, + "objective/train/value_loss": 0.0004695657407864928, + "objective/train/value_max": -0.00217437744140625, + "objective/train/value_min": -0.1163330078125, + "objective/train/value_reward_corr": 0.19873224190020097, + "objective/train/value_std": 0.01236724853515625, + "objective/train/weight_avg": 1.0015043020248413, + "objective/train/weighted_lm_loss": 4.34890604019165, + "objective/train/weights_max": 1.0107529163360596, + "objective/train/weights_min": 0.9772297739982605, + "theoretical_loss": 3.8651965929861625, + "tokens_seen": 563609600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008375601926163724, + "loss": 2.1307, + "theoretical_loss": 3.8648175033844323, + "tokens_seen": 564133888 + }, + { + "epoch": 0.17, + "learning_rate": 0.000837239165329053, + "loss": 2.0811, + "theoretical_loss": 3.8640606753081954, + "tokens_seen": 565182464 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.015949858352541924, + "objective/train/docs_used": 329612, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.054144382476807, + "objective/train/original_loss": 4.054144382476807, + "objective/train/theoretical_loss": 3.864013433237152, + "objective/train/tokens_used": 585708000, + "objective/train/value_avg": -0.026092529296875, + "objective/train/value_loss": 0.0029879792127758265, + "objective/train/value_max": -0.003185272216796875, + "objective/train/value_min": -0.259033203125, + "objective/train/value_reward_corr": 0.2074668384410235, + "objective/train/value_std": 0.02471923828125, + "objective/train/weight_avg": 1.0016096830368042, + "objective/train/weighted_lm_loss": 4.061420440673828, + "objective/train/weights_max": 1.023935079574585, + "objective/train/weights_min": 0.9085753560066223, + "theoretical_loss": 3.864013433237152, + "tokens_seen": 565248000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008369181380417335, + "loss": 2.1387, + "theoretical_loss": 3.8633056423924232, + "tokens_seen": 566231040 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.010449369437992573, + "objective/train/docs_used": 331071, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.274148464202881, + "objective/train/original_loss": 4.274148941040039, + "objective/train/theoretical_loss": 3.8628346550591868, + "objective/train/tokens_used": 587346400, + "objective/train/value_avg": -0.01763916015625, + "objective/train/value_loss": 0.0012260202784091234, + "objective/train/value_max": -0.0022792816162109375, + "objective/train/value_min": -0.1275634765625, + "objective/train/value_reward_corr": 0.19889430388478208, + "objective/train/value_std": 0.0107269287109375, + "objective/train/weight_avg": 1.0010510683059692, + "objective/train/weighted_lm_loss": 4.27907657623291, + "objective/train/weights_max": 1.0115379095077515, + "objective/train/weights_min": 0.9368029236793518, + "theoretical_loss": 3.8628346550591868, + "tokens_seen": 566886400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008365971107544141, + "loss": 2.118, + "theoretical_loss": 3.8625523970665174, + "tokens_seen": 567279616 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008362760834670947, + "loss": 2.1135, + "theoretical_loss": 3.8618009318057234, + "tokens_seen": 568328192 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.011272966861724854, + "objective/train/docs_used": 331416, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.106100559234619, + "objective/train/original_loss": 4.106100559234619, + "objective/train/theoretical_loss": 3.8616602296327467, + "objective/train/tokens_used": 588984800, + "objective/train/value_avg": -0.0218505859375, + "objective/train/value_loss": 0.0025289456825703382, + "objective/train/value_max": -0.0027256011962890625, + "objective/train/value_min": -0.163818359375, + "objective/train/value_reward_corr": 0.08082439425621972, + "objective/train/value_std": 0.0122528076171875, + "objective/train/weight_avg": 1.0011396408081055, + "objective/train/weighted_lm_loss": 4.110568523406982, + "objective/train/weights_max": 1.0164382457733154, + "objective/train/weights_min": 0.9108185172080994, + "theoretical_loss": 3.8616602296327467, + "tokens_seen": 568524800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008359550561797753, + "loss": 2.1268, + "theoretical_loss": 3.861051239130771, + "tokens_seen": 569376768 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.0035612003412097692, + "objective/train/docs_used": 332819, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.304696083068848, + "objective/train/original_loss": 4.304696083068848, + "objective/train/theoretical_loss": 3.8604901284102264, + "objective/train/tokens_used": 590623200, + "objective/train/value_avg": -0.03271484375, + "objective/train/value_loss": 0.01250394806265831, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.325927734375, + "objective/train/value_reward_corr": 0.2075897118014128, + "objective/train/value_std": 0.036956787109375, + "objective/train/weight_avg": 0.9997053742408752, + "objective/train/weighted_lm_loss": 4.306632995605469, + "objective/train/weights_max": 1.0280400514602661, + "objective/train/weights_min": 0.9072619676589966, + "theoretical_loss": 3.8604901284102264, + "tokens_seen": 570163200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008356340288924558, + "loss": 2.1179, + "theoretical_loss": 3.860303311607516, + "tokens_seen": 570425344 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008353130016051365, + "loss": 2.1027, + "theoretical_loss": 3.859557141846584, + "tokens_seen": 571473920 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.013008789159357548, + "objective/train/docs_used": 333556, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.176945686340332, + "objective/train/original_loss": 4.17694616317749, + "objective/train/theoretical_loss": 3.8593243231125958, + "objective/train/tokens_used": 592261600, + "objective/train/value_avg": -0.0184478759765625, + "objective/train/value_loss": 0.0007288225460797548, + "objective/train/value_max": -0.0027256011962890625, + "objective/train/value_min": -0.1480712890625, + "objective/train/value_reward_corr": 0.18005209950277976, + "objective/train/value_std": 0.01308441162109375, + "objective/train/weight_avg": 1.0013046264648438, + "objective/train/weighted_lm_loss": 4.182594299316406, + "objective/train/weights_max": 1.0138894319534302, + "objective/train/weights_min": 0.9754530787467957, + "theoretical_loss": 3.8593243231125958, + "tokens_seen": 571801600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008349919743178171, + "loss": 2.1075, + "theoretical_loss": 3.858812722503022, + "tokens_seen": 572522496 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.002539166249334812, + "objective/train/docs_used": 334721, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.061913967132568, + "objective/train/original_loss": 4.06191349029541, + "objective/train/theoretical_loss": 3.8581627857261136, + "objective/train/tokens_used": 593900000, + "objective/train/value_avg": -0.0157623291015625, + "objective/train/value_loss": 0.005690024700015783, + "objective/train/value_max": -0.00147247314453125, + "objective/train/value_min": -0.09503173828125, + "objective/train/value_reward_corr": 0.09126952383322036, + "objective/train/value_std": 0.01020050048828125, + "objective/train/weight_avg": 1.000281810760498, + "objective/train/weighted_lm_loss": 4.061883449554443, + "objective/train/weights_max": 1.0094815492630005, + "objective/train/weights_min": 0.9174660444259644, + "theoretical_loss": 3.8581627857261136, + "tokens_seen": 573440000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008346709470304977, + "loss": 2.1315, + "theoretical_loss": 3.8580700462759463, + "tokens_seen": 573571072 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008343499197431782, + "loss": 2.1177, + "theoretical_loss": 3.857329105908203, + "tokens_seen": 574619648 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.0006173439905978739, + "objective/train/docs_used": 335233, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.359372138977051, + "objective/train/original_loss": 4.359372138977051, + "objective/train/theoretical_loss": 3.8570054884990883, + "objective/train/tokens_used": 595538400, + "objective/train/value_avg": -0.01806640625, + "objective/train/value_loss": 0.008965814486145973, + "objective/train/value_max": -0.001918792724609375, + "objective/train/value_min": -0.18798828125, + "objective/train/value_reward_corr": 0.2487482925270439, + "objective/train/value_std": 0.01617431640625, + "objective/train/weight_avg": 0.9999821186065674, + "objective/train/weighted_lm_loss": 4.3592047691345215, + "objective/train/weights_max": 1.017514705657959, + "objective/train/weights_min": 0.9105920791625977, + "theoretical_loss": 3.8570054884990883, + "tokens_seen": 575078400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008340288924558588, + "loss": 2.1442, + "theoretical_loss": 3.8565898941860244, + "tokens_seen": 575668224 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.02832317166030407, + "objective/train/docs_used": 335838, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.391421794891357, + "objective/train/original_loss": 4.391422271728516, + "objective/train/theoretical_loss": 3.855852403938689, + "objective/train/tokens_used": 597176800, + "objective/train/value_avg": -0.0214691162109375, + "objective/train/value_loss": 0.038061462342739105, + "objective/train/value_max": -0.0022525787353515625, + "objective/train/value_min": -0.1343994140625, + "objective/train/value_reward_corr": 0.14801607586577442, + "objective/train/value_std": 0.01519012451171875, + "objective/train/weight_avg": 0.997352659702301, + "objective/train/weighted_lm_loss": 4.372139930725098, + "objective/train/weights_max": 1.0108262300491333, + "objective/train/weights_min": 0.9060996174812317, + "theoretical_loss": 3.855852403938689, + "tokens_seen": 576716800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008337078651685394, + "loss": 2.122, + "theoretical_loss": 3.855852403938689, + "tokens_seen": 576716800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008333868378812199, + "loss": 2.1356, + "theoretical_loss": 3.8551166280381928, + "tokens_seen": 577765376 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.002297014929354191, + "objective/train/docs_used": 337192, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.8253583908081055, + "objective/train/original_loss": 4.825357913970947, + "objective/train/theoretical_loss": 3.854703504807805, + "objective/train/tokens_used": 598815200, + "objective/train/value_avg": -0.0243072509765625, + "objective/train/value_loss": 0.007638697978109121, + "objective/train/value_max": -0.00266265869140625, + "objective/train/value_min": -0.1898193359375, + "objective/train/value_reward_corr": 0.3128713338203254, + "objective/train/value_std": 0.0247955322265625, + "objective/train/weight_avg": 1.0002670288085938, + "objective/train/weighted_lm_loss": 4.826428413391113, + "objective/train/weights_max": 1.0173629522323608, + "objective/train/weights_min": 0.9063584804534912, + "theoretical_loss": 3.854703504807805, + "tokens_seen": 578355200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008330658105939005, + "loss": 2.1137, + "theoretical_loss": 3.854382559398911, + "tokens_seen": 578813952 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008327447833065811, + "loss": 2.1681, + "theoretical_loss": 3.8536501909772745, + "tokens_seen": 579862528 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -0.001082884380593896, + "objective/train/docs_used": 337878, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.649087429046631, + "objective/train/original_loss": 4.649087429046631, + "objective/train/theoretical_loss": 3.8535587641219466, + "objective/train/tokens_used": 600453600, + "objective/train/value_avg": -0.016571044921875, + "objective/train/value_loss": 0.007768101058900356, + "objective/train/value_max": -0.002002716064453125, + "objective/train/value_min": -0.1561279296875, + "objective/train/value_reward_corr": 0.2004813028644463, + "objective/train/value_std": 0.01010894775390625, + "objective/train/weight_avg": 0.9999296069145203, + "objective/train/weighted_lm_loss": 4.648331642150879, + "objective/train/weights_max": 1.0156548023223877, + "objective/train/weights_min": 0.907093346118927, + "theoretical_loss": 3.8535587641219466, + "tokens_seen": 579993600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008324237560192617, + "loss": 2.1127, + "theoretical_loss": 3.852919515771444, + "tokens_seen": 580911104 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0036619326565414667, + "objective/train/docs_used": 339259, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.391587734222412, + "objective/train/original_loss": 4.391586780548096, + "objective/train/theoretical_loss": 3.8524181551462013, + "objective/train/tokens_used": 602092000, + "objective/train/value_avg": -0.0171356201171875, + "objective/train/value_loss": 0.003556064562872052, + "objective/train/value_max": -0.00263214111328125, + "objective/train/value_min": -0.212890625, + "objective/train/value_reward_corr": 0.161169369878161, + "objective/train/value_std": 0.0108642578125, + "objective/train/weight_avg": 1.0003836154937744, + "objective/train/weighted_lm_loss": 4.3945465087890625, + "objective/train/weights_max": 1.0209743976593018, + "objective/train/weights_min": 0.9330379962921143, + "theoretical_loss": 3.8524181551462013, + "tokens_seen": 581632000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008321027287319422, + "loss": 2.1489, + "theoretical_loss": 3.8521905268209857, + "tokens_seen": 581959680 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008317817014446228, + "loss": 2.0708, + "theoretical_loss": 3.851463217206555, + "tokens_seen": 583008256 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.010938407853245735, + "objective/train/docs_used": 340083, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.848914861679077, + "objective/train/original_loss": 3.8489151000976562, + "objective/train/theoretical_loss": 3.8512816513922274, + "objective/train/tokens_used": 603730400, + "objective/train/value_avg": -0.0218353271484375, + "objective/train/value_loss": 0.0018594703869894147, + "objective/train/value_max": -0.0027370452880859375, + "objective/train/value_min": -0.19482421875, + "objective/train/value_reward_corr": 0.22470099726383908, + "objective/train/value_std": 0.01446533203125, + "objective/train/weight_avg": 1.0011030435562134, + "objective/train/weighted_lm_loss": 3.8531322479248047, + "objective/train/weights_max": 1.0133250951766968, + "objective/train/weights_min": 0.9096461534500122, + "theoretical_loss": 3.8512816513922274, + "tokens_seen": 583270400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008314606741573035, + "loss": 2.0996, + "theoretical_loss": 3.85073758004958, + "tokens_seen": 584056832 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.016322694718837738, + "objective/train/docs_used": 341398, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.064023494720459, + "objective/train/original_loss": 4.064023971557617, + "objective/train/theoretical_loss": 3.8501492266152972, + "objective/train/tokens_used": 605368800, + "objective/train/value_avg": -0.021484375, + "objective/train/value_loss": 0.0014752141432836652, + "objective/train/value_max": -0.0021648406982421875, + "objective/train/value_min": -0.1766357421875, + "objective/train/value_reward_corr": 0.09249171606772043, + "objective/train/value_std": 0.01490020751953125, + "objective/train/weight_avg": 1.001639485359192, + "objective/train/weighted_lm_loss": 4.070662021636963, + "objective/train/weights_max": 1.016181468963623, + "objective/train/weights_min": 0.9146255850791931, + "theoretical_loss": 3.8501492266152972, + "tokens_seen": 584908800 + }, + { + "epoch": 0.18, + "learning_rate": 0.000831139646869984, + "loss": 2.0677, + "theoretical_loss": 3.850013608511947, + "tokens_seen": 585105408 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008308186195826646, + "loss": 2.0723, + "theoretical_loss": 3.8492912957956933, + "tokens_seen": 586153984 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.008657315745949745, + "objective/train/docs_used": 342071, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.312041759490967, + "objective/train/original_loss": 4.312041759490967, + "objective/train/theoretical_loss": 3.849020854811377, + "objective/train/tokens_used": 607007200, + "objective/train/value_avg": -0.01548004150390625, + "objective/train/value_loss": 0.0013729914790019393, + "objective/train/value_max": -0.0021572113037109375, + "objective/train/value_min": -0.1258544921875, + "objective/train/value_reward_corr": 0.06407234985858476, + "objective/train/value_std": 0.0101318359375, + "objective/train/weight_avg": 1.0008724927902222, + "objective/train/weighted_lm_loss": 4.315770149230957, + "objective/train/weights_max": 1.0116883516311646, + "objective/train/weights_min": 0.9365071654319763, + "theoretical_loss": 3.849020854811377, + "tokens_seen": 586547200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008304975922953452, + "loss": 2.1029, + "theoretical_loss": 3.848570635142696, + "tokens_seen": 587202560 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0016843685880303383, + "objective/train/docs_used": 342650, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.030447959899902, + "objective/train/original_loss": 4.030447006225586, + "objective/train/theoretical_loss": 3.8478965102142606, + "objective/train/tokens_used": 608645600, + "objective/train/value_avg": -0.018157958984375, + "objective/train/value_loss": 0.007303187623620033, + "objective/train/value_max": -0.0023593902587890625, + "objective/train/value_min": -0.34375, + "objective/train/value_reward_corr": 0.14149239429050037, + "objective/train/value_std": 0.01488494873046875, + "objective/train/weight_avg": 1.0002042055130005, + "objective/train/weighted_lm_loss": 4.030357837677002, + "objective/train/weights_max": 1.0349034070968628, + "objective/train/weights_min": 0.9088878035545349, + "theoretical_loss": 3.8478965102142606, + "tokens_seen": 588185600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008301765650080258, + "loss": 2.0636, + "theoretical_loss": 3.8478516198343717, + "tokens_seen": 588251136 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008298555377207063, + "loss": 2.0787, + "theoretical_loss": 3.847134243191375, + "tokens_seen": 589299712 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -0.013989568687975407, + "objective/train/docs_used": 343935, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.175821304321289, + "objective/train/original_loss": 4.175821781158447, + "objective/train/theoretical_loss": 3.8467761672927336, + "objective/train/tokens_used": 610284000, + "objective/train/value_avg": -0.017974853515625, + "objective/train/value_loss": 0.020902404561638832, + "objective/train/value_max": -0.0022258758544921875, + "objective/train/value_min": -0.1497802734375, + "objective/train/value_reward_corr": 0.07416381126242012, + "objective/train/value_std": 0.0116119384765625, + "objective/train/weight_avg": 0.9987029433250427, + "objective/train/weighted_lm_loss": 4.1664557456970215, + "objective/train/weights_max": 1.0149606466293335, + "objective/train/weights_min": 0.9073666334152222, + "theoretical_loss": 3.8467761672927336, + "tokens_seen": 589824000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008295345104333869, + "loss": 2.0425, + "theoretical_loss": 3.8464184985732968, + "tokens_seen": 590348288 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008292134831460675, + "loss": 2.0788, + "theoretical_loss": 3.845704379378372, + "tokens_seen": 591396864 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.01006072387099266, + "objective/train/docs_used": 344744, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8700971603393555, + "objective/train/original_loss": 3.8700973987579346, + "objective/train/theoretical_loss": 3.8456598007477893, + "objective/train/tokens_used": 611922400, + "objective/train/value_avg": -0.0161285400390625, + "objective/train/value_loss": 0.00025407009525224566, + "objective/train/value_max": -0.0024051666259765625, + "objective/train/value_min": -0.1336669921875, + "objective/train/value_reward_corr": 0.22329818587550287, + "objective/train/value_std": 0.009552001953125, + "objective/train/weight_avg": 1.001007318496704, + "objective/train/weighted_lm_loss": 3.8743083477020264, + "objective/train/weights_max": 1.0130605697631836, + "objective/train/weights_min": 0.993940532207489, + "theoretical_loss": 3.8456598007477893, + "tokens_seen": 591462400 + }, + { + "epoch": 0.18, + "learning_rate": 0.000828892455858748, + "loss": 2.0788, + "theoretical_loss": 3.8449918790431843, + "tokens_seen": 592445440 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.014712624251842499, + "objective/train/docs_used": 345976, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.297782897949219, + "objective/train/original_loss": 4.297783851623535, + "objective/train/theoretical_loss": 3.844547385509876, + "objective/train/tokens_used": 613560800, + "objective/train/value_avg": -0.03192138671875, + "objective/train/value_loss": 0.004335404373705387, + "objective/train/value_max": -0.0027141571044921875, + "objective/train/value_min": -0.289794921875, + "objective/train/value_reward_corr": 0.3991200301344518, + "objective/train/value_std": 0.04302978515625, + "objective/train/weight_avg": 1.0014926195144653, + "objective/train/weighted_lm_loss": 4.304871559143066, + "objective/train/weights_max": 1.0234148502349854, + "objective/train/weights_min": 0.9226583242416382, + "theoretical_loss": 3.844547385509876, + "tokens_seen": 593100800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008285714285714286, + "loss": 2.0456, + "theoretical_loss": 3.8442809910423783, + "tokens_seen": 593494016 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008282504012841091, + "loss": 2.0614, + "theoretical_loss": 3.8435717088883696, + "tokens_seen": 594542592 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.012646072544157505, + "objective/train/docs_used": 346603, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.061689376831055, + "objective/train/original_loss": 4.061689853668213, + "objective/train/theoretical_loss": 3.8434388967361928, + "objective/train/tokens_used": 615199200, + "objective/train/value_avg": -0.017578125, + "objective/train/value_loss": 0.0005255700089037418, + "objective/train/value_max": -0.00274658203125, + "objective/train/value_min": -0.09893798828125, + "objective/train/value_reward_corr": 0.17985562072553476, + "objective/train/value_std": 0.011383056640625, + "objective/train/weight_avg": 1.0012671947479248, + "objective/train/weighted_lm_loss": 4.067074298858643, + "objective/train/weights_max": 1.0098471641540527, + "objective/train/weights_min": 0.9736138582229614, + "theoretical_loss": 3.8434388967361928, + "tokens_seen": 594739200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008279293739967897, + "loss": 2.0572, + "theoretical_loss": 3.842864026131061, + "tokens_seen": 595591168 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -1.896463436423801e-05, + "objective/train/docs_used": 348035, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9135286808013916, + "objective/train/original_loss": 3.9135289192199707, + "objective/train/theoretical_loss": 3.8423343098080185, + "objective/train/tokens_used": 616837600, + "objective/train/value_avg": -0.019805908203125, + "objective/train/value_loss": 0.0017524255672469735, + "objective/train/value_max": -0.002758026123046875, + "objective/train/value_min": -0.1519775390625, + "objective/train/value_reward_corr": 0.23296097973412738, + "objective/train/value_std": 0.01171875, + "objective/train/weight_avg": 1.0000067949295044, + "objective/train/weighted_lm_loss": 3.913968563079834, + "objective/train/weights_max": 1.0092403888702393, + "objective/train/weights_min": 0.920685887336731, + "theoretical_loss": 3.8423343098080185, + "tokens_seen": 596377600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008276083467094702, + "loss": 2.0498, + "theoretical_loss": 3.8421579363575615, + "tokens_seen": 596639744 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008272873194221508, + "loss": 2.038, + "theoretical_loss": 3.841453433191904, + "tokens_seen": 597688320 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.006930607836693525, + "objective/train/docs_used": 348629, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.100687026977539, + "objective/train/original_loss": 4.100687026977539, + "objective/train/theoretical_loss": 3.8412336003280814, + "objective/train/tokens_used": 618476000, + "objective/train/value_avg": -0.042724609375, + "objective/train/value_loss": 0.006683968473225832, + "objective/train/value_max": -0.0029125213623046875, + "objective/train/value_min": -0.4091796875, + "objective/train/value_reward_corr": 0.6709495238138796, + "objective/train/value_std": 0.06292724609375, + "objective/train/weight_avg": 1.0007262229919434, + "objective/train/weighted_lm_loss": 4.104631423950195, + "objective/train/weights_max": 1.0367907285690308, + "objective/train/weights_min": 0.9314780235290527, + "theoretical_loss": 3.8412336003280814, + "tokens_seen": 598016000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008269662921348315, + "loss": 1.9949, + "theoretical_loss": 3.8407505102947725, + "tokens_seen": 598736896 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.008757428266108036, + "objective/train/docs_used": 349996, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.987126588821411, + "objective/train/original_loss": 3.987126588821411, + "objective/train/theoretical_loss": 3.8401367441179683, + "objective/train/tokens_used": 620114400, + "objective/train/value_avg": -0.025787353515625, + "objective/train/value_loss": 0.0037563855294138193, + "objective/train/value_max": -0.00260162353515625, + "objective/train/value_min": -0.333984375, + "objective/train/value_reward_corr": 0.4461347589608071, + "objective/train/value_std": 0.0303802490234375, + "objective/train/weight_avg": 1.00089430809021, + "objective/train/weighted_lm_loss": 3.9907405376434326, + "objective/train/weights_max": 1.0301774740219116, + "objective/train/weights_min": 0.9262629747390747, + "theoretical_loss": 3.8401367441179683, + "tokens_seen": 599654400 + }, + { + "epoch": 0.18, + "learning_rate": 0.000826645264847512, + "loss": 2.0271, + "theoretical_loss": 3.840049161363223, + "tokens_seen": 599785472 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008263242375601926, + "loss": 2.025, + "theoretical_loss": 3.839349380130415, + "tokens_seen": 600834048 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.007686337921768427, + "objective/train/docs_used": 350686, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9153785705566406, + "objective/train/original_loss": 3.9153785705566406, + "objective/train/theoretical_loss": 3.839043717215569, + "objective/train/tokens_used": 621752800, + "objective/train/value_avg": -0.0196533203125, + "objective/train/value_loss": 0.00645302701741457, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.1619873046875, + "objective/train/value_reward_corr": 0.18438765070484855, + "objective/train/value_std": 0.0153045654296875, + "objective/train/weight_avg": 1.0008002519607544, + "objective/train/weighted_lm_loss": 3.9174461364746094, + "objective/train/weights_max": 1.01417875289917, + "objective/train/weights_min": 0.9062991738319397, + "theoretical_loss": 3.839043717215569, + "tokens_seen": 601292800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008260032102728732, + "loss": 2.0223, + "theoretical_loss": 3.838651160365341, + "tokens_seen": 601882624 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.013852830976247787, + "objective/train/docs_used": 352047, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.218423366546631, + "objective/train/original_loss": 4.218422889709473, + "objective/train/theoretical_loss": 3.837954495872559, + "objective/train/tokens_used": 623391200, + "objective/train/value_avg": -0.029205322265625, + "objective/train/value_loss": 0.0016073071165010333, + "objective/train/value_max": -0.0025806427001953125, + "objective/train/value_min": -0.380126953125, + "objective/train/value_reward_corr": 0.77958978388194, + "objective/train/value_std": 0.05078125, + "objective/train/weight_avg": 1.0013933181762695, + "objective/train/weighted_lm_loss": 4.224894046783447, + "objective/train/weights_max": 1.0339887142181396, + "objective/train/weights_min": 0.9548699259757996, + "theoretical_loss": 3.837954495872559, + "tokens_seen": 602931200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008256821829855538, + "loss": 1.9551, + "theoretical_loss": 3.837954495872559, + "tokens_seen": 602931200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008253611556982343, + "loss": 2.0141, + "theoretical_loss": 3.837259380491929, + "tokens_seen": 603979776 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -0.006136131472885609, + "objective/train/docs_used": 352465, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.917938232421875, + "objective/train/original_loss": 3.917938470840454, + "objective/train/theoretical_loss": 3.8368690565519143, + "objective/train/tokens_used": 625029600, + "objective/train/value_avg": -0.03729248046875, + "objective/train/value_loss": 0.003560740267857909, + "objective/train/value_max": -0.0021152496337890625, + "objective/train/value_min": -0.5439453125, + "objective/train/value_reward_corr": 0.8787817089625624, + "objective/train/value_std": 0.0711669921875, + "objective/train/weight_avg": 0.9994040727615356, + "objective/train/weighted_lm_loss": 3.918323278427124, + "objective/train/weights_max": 1.015776515007019, + "objective/train/weights_min": 0.9555200338363647, + "theoretical_loss": 3.8368690565519143, + "tokens_seen": 604569600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008250401284109149, + "loss": 2.0304, + "theoretical_loss": 3.836565808098351, + "tokens_seen": 605028352 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008247191011235955, + "loss": 1.9997, + "theoretical_loss": 3.835873772601505, + "tokens_seen": 606076928 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.008396715857088566, + "objective/train/docs_used": 353634, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.310669422149658, + "objective/train/original_loss": 4.310669422149658, + "objective/train/theoretical_loss": 3.8357873759254693, + "objective/train/tokens_used": 626668000, + "objective/train/value_avg": -0.0171051025390625, + "objective/train/value_loss": 0.0011922348057851195, + "objective/train/value_max": -0.00206756591796875, + "objective/train/value_min": -0.09979248046875, + "objective/train/value_reward_corr": 0.16272393189233336, + "objective/train/value_std": 0.01088714599609375, + "objective/train/weight_avg": 1.0008456707000732, + "objective/train/weighted_lm_loss": 4.31482458114624, + "objective/train/weights_max": 1.0097836256027222, + "objective/train/weights_min": 0.9176437258720398, + "theoretical_loss": 3.8357873759254693, + "tokens_seen": 606208000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008243980738362761, + "loss": 2.0301, + "theoretical_loss": 3.8351832679455935, + "tokens_seen": 607125504 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.014967095106840134, + "objective/train/docs_used": 354263, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.955833673477173, + "objective/train/original_loss": 3.955833911895752, + "objective/train/theoretical_loss": 3.8347094308715017, + "objective/train/tokens_used": 628306400, + "objective/train/value_avg": -0.030303955078125, + "objective/train/value_loss": 0.0027513238601386547, + "objective/train/value_max": -0.00327301025390625, + "objective/train/value_min": -0.2086181640625, + "objective/train/value_reward_corr": 0.25062002457231053, + "objective/train/value_std": 0.0250091552734375, + "objective/train/weight_avg": 1.0015103816986084, + "objective/train/weighted_lm_loss": 3.9630239009857178, + "objective/train/weights_max": 1.0198014974594116, + "objective/train/weights_min": 0.9204447865486145, + "theoretical_loss": 3.8347094308715017, + "tokens_seen": 607846400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008240770465489566, + "loss": 2.0309, + "theoretical_loss": 3.834494288109086, + "tokens_seen": 608174080 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008237560192616372, + "loss": 2.0131, + "theoretical_loss": 3.8338068271044703, + "tokens_seen": 609222656 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.006386174354702234, + "objective/train/docs_used": 355721, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3440637588500977, + "objective/train/original_loss": 3.3440637588500977, + "objective/train/theoretical_loss": 3.833635198472356, + "objective/train/tokens_used": 629944800, + "objective/train/value_avg": -0.0189056396484375, + "objective/train/value_loss": 0.0019461896736174822, + "objective/train/value_max": -0.0024623870849609375, + "objective/train/value_min": -0.1390380859375, + "objective/train/value_reward_corr": 0.3187656655426233, + "objective/train/value_std": 0.01464080810546875, + "objective/train/weight_avg": 1.0006482601165771, + "objective/train/weighted_lm_loss": 3.3467679023742676, + "objective/train/weights_max": 1.0139013528823853, + "objective/train/weights_min": 0.9417673349380493, + "theoretical_loss": 3.833635198472356, + "tokens_seen": 609484800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008234349919743179, + "loss": 2.0061, + "theoretical_loss": 3.8331208789779954, + "tokens_seen": 610271232 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.013438696973025799, + "objective/train/docs_used": 356348, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.013378143310547, + "objective/train/original_loss": 4.013378143310547, + "objective/train/theoretical_loss": 3.8325646560121003, + "objective/train/tokens_used": 631583200, + "objective/train/value_avg": -0.02545166015625, + "objective/train/value_loss": 0.0034957733005285263, + "objective/train/value_max": -0.0024623870849609375, + "objective/train/value_min": -0.43994140625, + "objective/train/value_reward_corr": 0.5039742478461964, + "objective/train/value_std": 0.040771484375, + "objective/train/weight_avg": 1.0013611316680908, + "objective/train/weighted_lm_loss": 4.01905632019043, + "objective/train/weights_max": 1.0426896810531616, + "objective/train/weights_min": 0.9200591444969177, + "theoretical_loss": 3.8325646560121003, + "tokens_seen": 611123200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008231139646869984, + "loss": 1.9972, + "theoretical_loss": 3.83243643780943, + "tokens_seen": 611319808 + }, + { + "epoch": 0.19, + "learning_rate": 0.000822792937399679, + "loss": 2.0075, + "theoretical_loss": 3.8317534977118117, + "tokens_seen": 612368384 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.01290873158723116, + "objective/train/docs_used": 356825, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.340220928192139, + "objective/train/original_loss": 4.340221405029297, + "objective/train/theoretical_loss": 3.831497780974214, + "objective/train/tokens_used": 633221600, + "objective/train/value_avg": -0.019744873046875, + "objective/train/value_loss": 0.0013453718274831772, + "objective/train/value_max": -0.0018825531005859375, + "objective/train/value_min": -0.1607666015625, + "objective/train/value_reward_corr": 0.15383315828620145, + "objective/train/value_std": 0.01537322998046875, + "objective/train/weight_avg": 1.0012975931167603, + "objective/train/weighted_lm_loss": 4.345830917358398, + "objective/train/weights_max": 1.0151504278182983, + "objective/train/weights_min": 0.9389271140098572, + "theoretical_loss": 3.831497780974214, + "tokens_seen": 612761600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008224719101123596, + "loss": 2.0301, + "theoretical_loss": 3.8310720528312077, + "tokens_seen": 613416960 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.002368663204833865, + "objective/train/docs_used": 357491, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.169087886810303, + "objective/train/original_loss": 4.169087886810303, + "objective/train/theoretical_loss": 3.8304345510393105, + "objective/train/tokens_used": 634860000, + "objective/train/value_avg": -0.0253448486328125, + "objective/train/value_loss": 0.005148636177182198, + "objective/train/value_max": -0.00229644775390625, + "objective/train/value_min": -0.38427734375, + "objective/train/value_reward_corr": 0.7308000277912775, + "objective/train/value_std": 0.041595458984375, + "objective/train/weight_avg": 1.0002622604370117, + "objective/train/weighted_lm_loss": 4.1695685386657715, + "objective/train/weights_max": 1.029078483581543, + "objective/train/weights_min": 0.9175053834915161, + "theoretical_loss": 3.8304345510393105, + "tokens_seen": 614400000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008221508828250402, + "loss": 2.0521, + "theoretical_loss": 3.830392097346471, + "tokens_seen": 614465536 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008218298555377207, + "loss": 2.015, + "theoretical_loss": 3.8297136254690005, + "tokens_seen": 615514112 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.006822305731475353, + "objective/train/docs_used": 358741, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.213420391082764, + "objective/train/original_loss": 4.213420867919922, + "objective/train/theoretical_loss": 3.829374944082894, + "objective/train/tokens_used": 636498400, + "objective/train/value_avg": -0.0217132568359375, + "objective/train/value_loss": 0.003298286348581314, + "objective/train/value_max": -0.0021495819091796875, + "objective/train/value_min": -0.1519775390625, + "objective/train/value_reward_corr": 0.1323668390904736, + "objective/train/value_std": 0.01548004150390625, + "objective/train/weight_avg": 1.0006985664367676, + "objective/train/weighted_lm_loss": 4.216880798339844, + "objective/train/weights_max": 1.0135878324508667, + "objective/train/weights_min": 0.9380416870117188, + "theoretical_loss": 3.829374944082894, + "tokens_seen": 616038400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008215088282504013, + "loss": 2.0602, + "theoretical_loss": 3.829036631442506, + "tokens_seen": 616562688 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008211878009630819, + "loss": 2.0318, + "theoretical_loss": 3.8283611095427723, + "tokens_seen": 617611264 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": -0.001719823689199984, + "objective/train/docs_used": 359404, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.128758907318115, + "objective/train/original_loss": 4.128758907318115, + "objective/train/theoretical_loss": 3.8283189381731377, + "objective/train/tokens_used": 638136800, + "objective/train/value_avg": -0.031768798828125, + "objective/train/value_loss": 0.006694779731333256, + "objective/train/value_max": -0.00157928466796875, + "objective/train/value_min": -0.26513671875, + "objective/train/value_reward_corr": 0.6470212849537081, + "objective/train/value_std": 0.041839599609375, + "objective/train/weight_avg": 0.9998611211776733, + "objective/train/weighted_lm_loss": 4.1286821365356445, + "objective/train/weights_max": 1.0242695808410645, + "objective/train/weights_min": 0.9240601658821106, + "theoretical_loss": 3.8283189381731377, + "tokens_seen": 617676800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008208667736757624, + "loss": 2.0589, + "theoretical_loss": 3.827687054077426, + "tokens_seen": 618659840 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.005946433171629906, + "objective/train/docs_used": 360794, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7128281593322754, + "objective/train/original_loss": 3.7128281593322754, + "objective/train/theoretical_loss": 3.8272665115687077, + "objective/train/tokens_used": 639775200, + "objective/train/value_avg": -0.025360107421875, + "objective/train/value_loss": 0.005830062087625265, + "objective/train/value_max": -0.0022430419921875, + "objective/train/value_min": -0.369140625, + "objective/train/value_reward_corr": 0.18059847896473372, + "objective/train/value_std": 0.0174102783203125, + "objective/train/weight_avg": 1.000623106956482, + "objective/train/weighted_lm_loss": 3.7145137786865234, + "objective/train/weights_max": 1.0374397039413452, + "objective/train/weights_min": 0.9075607657432556, + "theoretical_loss": 3.8272665115687077, + "tokens_seen": 619315200 + }, + { + "epoch": 0.19, + "learning_rate": 0.000820545746388443, + "loss": 2.0123, + "theoretical_loss": 3.8270144593857056, + "tokens_seen": 619708416 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008202247191011236, + "loss": 1.9966, + "theoretical_loss": 3.8263433198382324, + "tokens_seen": 620756992 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.012908569537103176, + "objective/train/docs_used": 361341, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7051761150360107, + "objective/train/original_loss": 3.7051756381988525, + "objective/train/theoretical_loss": 3.8262176427166046, + "objective/train/tokens_used": 641413600, + "objective/train/value_avg": -0.02099609375, + "objective/train/value_loss": 0.0007205713191069663, + "objective/train/value_max": -0.0018672943115234375, + "objective/train/value_min": -0.26806640625, + "objective/train/value_reward_corr": 0.17752218741522924, + "objective/train/value_std": 0.0177001953125, + "objective/train/weight_avg": 1.001294493675232, + "objective/train/weighted_lm_loss": 3.711048126220703, + "objective/train/weights_max": 1.0267831087112427, + "objective/train/weights_min": 0.9891812801361084, + "theoretical_loss": 3.8262176427166046, + "tokens_seen": 620953600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008199036918138042, + "loss": 2.0626, + "theoretical_loss": 3.825673629836783, + "tokens_seen": 621805568 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.005739489104598761, + "objective/train/docs_used": 362042, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.376725196838379, + "objective/train/original_loss": 4.376725196838379, + "objective/train/theoretical_loss": 3.8251723102500437, + "objective/train/tokens_used": 643052000, + "objective/train/value_avg": -0.0253753662109375, + "objective/train/value_loss": 0.006517472676932812, + "objective/train/value_max": -0.001987457275390625, + "objective/train/value_min": -0.2255859375, + "objective/train/value_reward_corr": 0.15656971027313163, + "objective/train/value_std": 0.0174560546875, + "objective/train/weight_avg": 1.0006059408187866, + "objective/train/weighted_lm_loss": 4.379768371582031, + "objective/train/weights_max": 1.022748589515686, + "objective/train/weights_min": 0.9109374284744263, + "theoretical_loss": 3.8251723102500437, + "tokens_seen": 622592000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008195826645264847, + "loss": 2.0054, + "theoretical_loss": 3.8250053838140663, + "tokens_seen": 622854144 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008192616372391654, + "loss": 2.0155, + "theoretical_loss": 3.8243385762335, + "tokens_seen": 623902720 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.014197059907019138, + "objective/train/docs_used": 363016, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1981420516967773, + "objective/train/original_loss": 3.1981422901153564, + "objective/train/theoretical_loss": 3.824130492986359, + "objective/train/tokens_used": 644690400, + "objective/train/value_avg": -0.0271148681640625, + "objective/train/value_loss": 0.004029736388474703, + "objective/train/value_max": -0.0023784637451171875, + "objective/train/value_min": -0.251220703125, + "objective/train/value_reward_corr": 0.17686068394509172, + "objective/train/value_std": 0.017822265625, + "objective/train/weight_avg": 1.0014395713806152, + "objective/train/weighted_lm_loss": 3.2027077674865723, + "objective/train/weights_max": 1.0249199867248535, + "objective/train/weights_min": 0.9079936742782593, + "theoretical_loss": 3.824130492986359, + "tokens_seen": 624230400 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818940609951846, + "loss": 2.0149, + "theoretical_loss": 3.8236732015889903, + "tokens_seen": 624951296 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.011475925333797932, + "objective/train/docs_used": 363672, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.102691650390625, + "objective/train/original_loss": 4.102691173553467, + "objective/train/theoretical_loss": 3.823092169924938, + "objective/train/tokens_used": 646328800, + "objective/train/value_avg": -0.0287628173828125, + "objective/train/value_loss": 0.001912580686621368, + "objective/train/value_max": -0.002414703369140625, + "objective/train/value_min": -0.2890625, + "objective/train/value_reward_corr": 0.7089827774410985, + "objective/train/value_std": 0.0391845703125, + "objective/train/weight_avg": 1.0011570453643799, + "objective/train/weighted_lm_loss": 4.107247352600098, + "objective/train/weights_max": 1.0245072841644287, + "objective/train/weights_min": 0.9531702399253845, + "theoretical_loss": 3.823092169924938, + "tokens_seen": 625868800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008186195826645265, + "loss": 2.0018, + "theoretical_loss": 3.8230092544047123, + "tokens_seen": 625999872 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008182985553772071, + "loss": 2.0289, + "theoretical_loss": 3.8223467292348943, + "tokens_seen": 627048448 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": -0.0058124978095293045, + "objective/train/docs_used": 364826, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8318660259246826, + "objective/train/original_loss": 3.8318662643432617, + "objective/train/theoretical_loss": 3.8220573202451886, + "objective/train/tokens_used": 647967200, + "objective/train/value_avg": -0.0247344970703125, + "objective/train/value_loss": 0.011669592000544071, + "objective/train/value_max": -0.0017824172973632812, + "objective/train/value_min": -0.324951171875, + "objective/train/value_reward_corr": 0.37675528594926, + "objective/train/value_std": 0.0284423828125, + "objective/train/weight_avg": 0.9994758367538452, + "objective/train/weighted_lm_loss": 3.8310704231262207, + "objective/train/weights_max": 1.0327742099761963, + "objective/train/weights_min": 0.9114384651184082, + "theoretical_loss": 3.8220573202451886, + "tokens_seen": 627507200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008179775280898877, + "loss": 2.0214, + "theoretical_loss": 3.8216856206636014, + "tokens_seen": 628097024 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.010966616682708263, + "objective/train/docs_used": 365375, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.019897937774658, + "objective/train/original_loss": 4.019897937774658, + "objective/train/theoretical_loss": 3.8210259233045254, + "objective/train/tokens_used": 649605600, + "objective/train/value_avg": -0.019256591796875, + "objective/train/value_loss": 0.0020588247571140528, + "objective/train/value_max": -0.0018033981323242188, + "objective/train/value_min": -0.29248046875, + "objective/train/value_reward_corr": 0.11859578639141702, + "objective/train/value_std": 0.0161895751953125, + "objective/train/weight_avg": 1.001106858253479, + "objective/train/weighted_lm_loss": 4.024221897125244, + "objective/train/weights_max": 1.02814781665802, + "objective/train/weights_min": 0.9310364723205566, + "theoretical_loss": 3.8210259233045254, + "tokens_seen": 629145600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008176565008025683, + "loss": 1.9762, + "theoretical_loss": 3.8210259233045254, + "tokens_seen": 629145600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008173354735152488, + "loss": 2.0012, + "theoretical_loss": 3.8203676318007704, + "tokens_seen": 630194176 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.01202993094921112, + "objective/train/docs_used": 366612, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9368808269500732, + "objective/train/original_loss": 3.936880350112915, + "objective/train/theoretical_loss": 3.8199979586363946, + "objective/train/tokens_used": 651244000, + "objective/train/value_avg": -0.0208282470703125, + "objective/train/value_loss": 0.001967785181477666, + "objective/train/value_max": -0.0015850067138671875, + "objective/train/value_min": -0.280517578125, + "objective/train/value_reward_corr": 0.1876747148721428, + "objective/train/value_std": 0.0195159912109375, + "objective/train/weight_avg": 1.0012125968933105, + "objective/train/weighted_lm_loss": 3.9414987564086914, + "objective/train/weights_max": 1.0277023315429688, + "objective/train/weights_min": 0.9347817897796631, + "theoretical_loss": 3.8199979586363946, + "tokens_seen": 630784000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008170144462279294, + "loss": 2.0109, + "theoretical_loss": 3.819710740824646, + "tokens_seen": 631242752 + }, + { + "epoch": 0.19, + "learning_rate": 0.00081669341894061, + "loss": 1.9739, + "theoretical_loss": 3.8190552450774584, + "tokens_seen": 632291328 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.009772824123501778, + "objective/train/docs_used": 367323, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.619089603424072, + "objective/train/original_loss": 4.6190900802612305, + "objective/train/theoretical_loss": 3.8189734059483165, + "objective/train/tokens_used": 652882400, + "objective/train/value_avg": -0.0214080810546875, + "objective/train/value_loss": 0.006994395516812801, + "objective/train/value_max": -0.0021820068359375, + "objective/train/value_min": -0.32177734375, + "objective/train/value_reward_corr": 0.2474740131128931, + "objective/train/value_std": 0.01898193359375, + "objective/train/weight_avg": 1.0010113716125488, + "objective/train/weighted_lm_loss": 4.623510837554932, + "objective/train/weights_max": 1.032599687576294, + "objective/train/weights_min": 0.9077461957931519, + "theoretical_loss": 3.8189734059483165, + "tokens_seen": 632422400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008163723916532906, + "loss": 1.9954, + "theoretical_loss": 3.818401139289306, + "tokens_seen": 633339904 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.010038930922746658, + "objective/train/docs_used": 367883, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9464006423950195, + "objective/train/original_loss": 3.9463999271392822, + "objective/train/theoretical_loss": 3.817952245119961, + "objective/train/tokens_used": 654520800, + "objective/train/value_avg": -0.017974853515625, + "objective/train/value_loss": 0.002969532273709774, + "objective/train/value_max": -0.0016546249389648438, + "objective/train/value_min": -0.41650390625, + "objective/train/value_reward_corr": 0.09737633733778507, + "objective/train/value_std": 0.0186767578125, + "objective/train/weight_avg": 1.0010184049606323, + "objective/train/weighted_lm_loss": 3.9500861167907715, + "objective/train/weights_max": 1.042465329170227, + "objective/train/weights_min": 0.9070237278938293, + "theoretical_loss": 3.817952245119961, + "tokens_seen": 634060800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008160513643659711, + "loss": 1.9965, + "theoretical_loss": 3.8177484182188737, + "tokens_seen": 634388480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008157303370786517, + "loss": 1.9868, + "theoretical_loss": 3.8170970766532326, + "tokens_seen": 635437056 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.011239549145102501, + "objective/train/docs_used": 369288, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.329853057861328, + "objective/train/original_loss": 4.329853534698486, + "objective/train/theoretical_loss": 3.816934456201243, + "objective/train/tokens_used": 656159200, + "objective/train/value_avg": -0.0186767578125, + "objective/train/value_loss": 0.0016396689461544156, + "objective/train/value_max": -0.0020904541015625, + "objective/train/value_min": -0.43212890625, + "objective/train/value_reward_corr": 0.16714385518143662, + "objective/train/value_std": 0.01502227783203125, + "objective/train/weight_avg": 1.0011321306228638, + "objective/train/weighted_lm_loss": 4.334932804107666, + "objective/train/weights_max": 1.0434328317642212, + "objective/train/weights_min": 0.918943464756012, + "theoretical_loss": 3.816934456201243, + "tokens_seen": 635699200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008154093097913324, + "loss": 1.9933, + "theoretical_loss": 3.816447109407641, + "tokens_seen": 636485632 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": -0.015291519463062286, + "objective/train/docs_used": 369983, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8676187992095947, + "objective/train/original_loss": 3.8676187992095947, + "objective/train/theoretical_loss": 3.815920019410453, + "objective/train/tokens_used": 657797600, + "objective/train/value_avg": -0.033660888671875, + "objective/train/value_loss": 0.01720603182911873, + "objective/train/value_max": -0.0020427703857421875, + "objective/train/value_min": -0.376708984375, + "objective/train/value_reward_corr": 0.4946933318899237, + "objective/train/value_std": 0.049896240234375, + "objective/train/weight_avg": 0.9985548853874207, + "objective/train/weighted_lm_loss": 3.860685110092163, + "objective/train/weights_max": 1.030781626701355, + "objective/train/weights_min": 0.9084475040435791, + "theoretical_loss": 3.815920019410453, + "tokens_seen": 637337600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008150882825040129, + "loss": 1.9811, + "theoretical_loss": 3.815798511325341, + "tokens_seen": 637534208 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008147672552166935, + "loss": 1.9644, + "theoretical_loss": 3.8151512772773675, + "tokens_seen": 638582784 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.013817701488733292, + "objective/train/docs_used": 370882, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9968442916870117, + "objective/train/original_loss": 3.996844530105591, + "objective/train/theoretical_loss": 3.8149089151324036, + "objective/train/tokens_used": 659436000, + "objective/train/value_avg": -0.015777587890625, + "objective/train/value_loss": 0.0003502371546346694, + "objective/train/value_max": -0.0017547607421875, + "objective/train/value_min": -0.191650390625, + "objective/train/value_reward_corr": 0.16701842428673982, + "objective/train/value_std": 0.01251220703125, + "objective/train/weight_avg": 1.0013834238052368, + "objective/train/weighted_lm_loss": 4.0022053718566895, + "objective/train/weights_max": 1.0192853212356567, + "objective/train/weights_min": 0.9954832792282104, + "theoretical_loss": 3.8149089151324036, + "tokens_seen": 638976000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008144462279293741, + "loss": 1.9935, + "theoretical_loss": 3.814505402162349, + "tokens_seen": 639631360 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0018071408849209547, + "objective/train/docs_used": 371543, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9744062423706055, + "objective/train/original_loss": 3.9744064807891846, + "objective/train/theoretical_loss": 3.8139011239166054, + "objective/train/tokens_used": 661074400, + "objective/train/value_avg": -0.02178955078125, + "objective/train/value_loss": 0.008708304725587368, + "objective/train/value_max": -0.00250244140625, + "objective/train/value_min": -0.2257080078125, + "objective/train/value_reward_corr": 0.14624306564034262, + "objective/train/value_std": 0.015533447265625, + "objective/train/weight_avg": 1.0002232789993286, + "objective/train/weighted_lm_loss": 3.974822998046875, + "objective/train/weights_max": 1.0209152698516846, + "objective/train/weights_min": 0.9060660600662231, + "theoretical_loss": 3.8139011239166054, + "tokens_seen": 640614400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008141252006420547, + "loss": 1.9761, + "theoretical_loss": 3.813860880906316, + "tokens_seen": 640679936 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008138041733547352, + "loss": 1.9837, + "theoretical_loss": 3.813217708462508, + "tokens_seen": 641728512 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.009180986322462559, + "objective/train/docs_used": 373013, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8182313442230225, + "objective/train/original_loss": 3.8182313442230225, + "objective/train/theoretical_loss": 3.81289662647547, + "objective/train/tokens_used": 662712800, + "objective/train/value_avg": -0.021759033203125, + "objective/train/value_loss": 0.0013332816306501627, + "objective/train/value_max": -0.0022335052490234375, + "objective/train/value_min": -0.301513671875, + "objective/train/value_reward_corr": 0.39299668404301286, + "objective/train/value_std": 0.0270233154296875, + "objective/train/weight_avg": 1.000924825668335, + "objective/train/weighted_lm_loss": 3.8231024742126465, + "objective/train/weights_max": 1.0299057960510254, + "objective/train/weights_min": 0.9297232031822205, + "theoretical_loss": 3.81289662647547, + "tokens_seen": 642252800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008134831460674158, + "loss": 1.9925, + "theoretical_loss": 3.8125758798111864, + "tokens_seen": 642777088 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008131621187800963, + "loss": 1.996, + "theoretical_loss": 3.8119353899594413, + "tokens_seen": 643825664 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.00512647582218051, + "objective/train/docs_used": 373533, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.253428936004639, + "objective/train/original_loss": 4.253429412841797, + "objective/train/theoretical_loss": 3.811895403682529, + "objective/train/tokens_used": 664351200, + "objective/train/value_avg": -0.016448974609375, + "objective/train/value_loss": 0.0027396120131015778, + "objective/train/value_max": -0.001956939697265625, + "objective/train/value_min": -0.2327880859375, + "objective/train/value_reward_corr": 0.31810427824561244, + "objective/train/value_std": 0.011962890625, + "objective/train/weight_avg": 1.0005261898040771, + "objective/train/weighted_lm_loss": 4.254344463348389, + "objective/train/weights_max": 1.0233981609344482, + "objective/train/weights_min": 0.9424889087677002, + "theoretical_loss": 3.811895403682529, + "tokens_seen": 643891200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008128410914927768, + "loss": 2.0042, + "theoretical_loss": 3.8112962339410092, + "tokens_seen": 644874240 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0036167223006486893, + "objective/train/docs_used": 374553, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6478099822998047, + "objective/train/original_loss": 3.6478097438812256, + "objective/train/theoretical_loss": 3.8108974365706887, + "objective/train/tokens_used": 665989600, + "objective/train/value_avg": -0.03253173828125, + "objective/train/value_loss": 0.012759230099618435, + "objective/train/value_max": -0.001934051513671875, + "objective/train/value_min": -0.60595703125, + "objective/train/value_reward_corr": 0.3114496814027316, + "objective/train/value_std": 0.049957275390625, + "objective/train/weight_avg": 1.0004242658615112, + "objective/train/weighted_lm_loss": 3.6478655338287354, + "objective/train/weights_max": 1.0525859594345093, + "objective/train/weights_min": 0.9090859293937683, + "theoretical_loss": 3.8108974365706887, + "tokens_seen": 645529600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008125200642054574, + "loss": 2.0011, + "theoretical_loss": 3.810658406816085, + "tokens_seen": 645922816 + }, + { + "epoch": 0.2, + "learning_rate": 0.000812199036918138, + "loss": 1.9866, + "theoretical_loss": 3.8100219036711396, + "tokens_seen": 646971392 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.006774307228624821, + "objective/train/docs_used": 375146, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.344118118286133, + "objective/train/original_loss": 4.344118118286133, + "objective/train/theoretical_loss": 3.8099027063304964, + "objective/train/tokens_used": 667628000, + "objective/train/value_avg": -0.02435302734375, + "objective/train/value_loss": 0.010541343130171299, + "objective/train/value_max": -0.0023975372314453125, + "objective/train/value_min": -0.202392578125, + "objective/train/value_reward_corr": 0.2772581026327615, + "objective/train/value_std": 0.0191497802734375, + "objective/train/weight_avg": 0.999374270439148, + "objective/train/weighted_lm_loss": 4.3424835205078125, + "objective/train/weights_max": 1.0199602842330933, + "objective/train/weights_min": 0.910927414894104, + "theoretical_loss": 3.8099027063304964, + "tokens_seen": 647168000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008118780096308186, + "loss": 1.9909, + "theoretical_loss": 3.809386719618737, + "tokens_seen": 648019968 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.009548051282763481, + "objective/train/docs_used": 375860, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.125392436981201, + "objective/train/original_loss": 4.125391960144043, + "objective/train/theoretical_loss": 3.808911194308436, + "objective/train/tokens_used": 669266400, + "objective/train/value_avg": -0.020965576171875, + "objective/train/value_loss": 0.0022521200589835644, + "objective/train/value_max": -0.0024051666259765625, + "objective/train/value_min": -0.334228515625, + "objective/train/value_reward_corr": 0.046250770011350986, + "objective/train/value_std": 0.0229339599609375, + "objective/train/weight_avg": 1.0009660720825195, + "objective/train/weighted_lm_loss": 4.1304097175598145, + "objective/train/weights_max": 1.033889889717102, + "objective/train/weights_min": 0.9559965133666992, + "theoretical_loss": 3.808911194308436, + "tokens_seen": 648806400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008115569823434991, + "loss": 1.9959, + "theoretical_loss": 3.808752849797353, + "tokens_seen": 649068544 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008112359550561798, + "loss": 1.9992, + "theoretical_loss": 3.8081202893712005, + "tokens_seen": 650117120 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.005084901116788387, + "objective/train/docs_used": 377374, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.141280651092529, + "objective/train/original_loss": 4.141280651092529, + "objective/train/theoretical_loss": 3.807922882005249, + "objective/train/tokens_used": 670904800, + "objective/train/value_avg": -0.02130126953125, + "objective/train/value_loss": 0.006299665663391352, + "objective/train/value_max": -0.001949310302734375, + "objective/train/value_min": -0.1754150390625, + "objective/train/value_reward_corr": 0.26871827391953346, + "objective/train/value_std": 0.0198822021484375, + "objective/train/weight_avg": 1.0005393028259277, + "objective/train/weighted_lm_loss": 4.142897129058838, + "objective/train/weights_max": 1.0155280828475952, + "objective/train/weights_min": 0.9200943112373352, + "theoretical_loss": 3.807922882005249, + "tokens_seen": 650444800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008109149277688604, + "loss": 1.9975, + "theoretical_loss": 3.807489033530046, + "tokens_seen": 651165696 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.008288674056529999, + "objective/train/docs_used": 377873, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.96990704536438, + "objective/train/original_loss": 3.969906806945801, + "objective/train/theoretical_loss": 3.806937751074268, + "objective/train/tokens_used": 672543200, + "objective/train/value_avg": -0.0174713134765625, + "objective/train/value_loss": 0.0020898638758808374, + "objective/train/value_max": -0.0017271041870117188, + "objective/train/value_min": -0.171630859375, + "objective/train/value_reward_corr": 0.17158596910943566, + "objective/train/value_std": 0.01427459716796875, + "objective/train/weight_avg": 1.0008392333984375, + "objective/train/weighted_lm_loss": 3.9726288318634033, + "objective/train/weights_max": 1.014709234237671, + "objective/train/weights_min": 0.9233612418174744, + "theoretical_loss": 3.806937751074268, + "tokens_seen": 652083200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008105939004815409, + "loss": 1.9976, + "theoretical_loss": 3.806859077489038, + "tokens_seen": 652214272 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008102728731942215, + "loss": 1.982, + "theoretical_loss": 3.806230416488531, + "tokens_seen": 653262848 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.008926665410399437, + "objective/train/docs_used": 379015, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.924867868423462, + "objective/train/original_loss": 3.9248673915863037, + "objective/train/theoretical_loss": 3.805955783319785, + "objective/train/tokens_used": 674181600, + "objective/train/value_avg": -0.0196685791015625, + "objective/train/value_loss": 0.0023723614867776632, + "objective/train/value_max": -0.001987457275390625, + "objective/train/value_min": -0.227783203125, + "objective/train/value_reward_corr": 0.13614677076482964, + "objective/train/value_std": 0.01453399658203125, + "objective/train/weight_avg": 1.0009043216705322, + "objective/train/weighted_lm_loss": 3.9278454780578613, + "objective/train/weights_max": 1.02138090133667, + "objective/train/weights_min": 0.931845486164093, + "theoretical_loss": 3.805955783319785, + "tokens_seen": 653721600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008099518459069021, + "loss": 1.9578, + "theoretical_loss": 3.8056030457939114, + "tokens_seen": 654311424 + }, + { + "debugging/Self-BLEU-5": 0.36561994328618, + "debugging/distinct-1-grams": 0.7722597331948876, + "debugging/distinct-2-grams": 0.8492548173712738, + "debugging/entropy-1-grams": 5.826808035476899, + "debugging/entropy-2-grams": 6.590880961095115, + "debugging/length": 495.27272727272725, + "debugging/num_segments": 11, + "debugging/raw_token_scores_avg": 0.027885353192687035, + "debugging/raw_token_scores_std": 0.11994407325983047, + "epoch": 0.2, + "objective/train/advantage_avg": -0.0036334700416773558, + "objective/train/docs_used": 379813, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7338175773620605, + "objective/train/original_loss": 3.7338173389434814, + "objective/train/theoretical_loss": 3.804976960695429, + "objective/train/tokens_used": 675820000, + "objective/train/value_avg": -0.0242156982421875, + "objective/train/value_loss": 0.012442988343536854, + "objective/train/value_max": -0.00206756591796875, + "objective/train/value_min": -0.315185546875, + "objective/train/value_reward_corr": 0.39252457536958063, + "objective/train/value_std": 0.0300140380859375, + "objective/train/weight_avg": 0.9996975064277649, + "objective/train/weighted_lm_loss": 3.73140811920166, + "objective/train/weights_max": 1.0319349765777588, + "objective/train/weights_min": 0.9083859920501709, + "theoretical_loss": 3.804976960695429, + "tokens_seen": 655360000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008096308186195827, + "loss": 1.9171, + "theoretical_loss": 3.804976960695429, + "tokens_seen": 655360000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008093097913322632, + "loss": 1.9435, + "theoretical_loss": 3.8043521565080236, + "tokens_seen": 656408576 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.011858230456709862, + "objective/train/docs_used": 381022, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9333927631378174, + "objective/train/original_loss": 3.9333930015563965, + "objective/train/theoretical_loss": 3.804001265302574, + "objective/train/tokens_used": 677458400, + "objective/train/value_avg": -0.01806640625, + "objective/train/value_loss": 0.0017703947378322482, + "objective/train/value_max": -0.0023593902587890625, + "objective/train/value_min": -0.297607421875, + "objective/train/value_reward_corr": 0.21208480585444445, + "objective/train/value_std": 0.01312255859375, + "objective/train/weight_avg": 1.0011945962905884, + "objective/train/weighted_lm_loss": 3.9384007453918457, + "objective/train/weights_max": 1.030119776725769, + "objective/train/weights_min": 0.9158653020858765, + "theoretical_loss": 3.804001265302574, + "tokens_seen": 656998400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008089887640449438, + "loss": 1.9875, + "theoretical_loss": 3.803728628571159, + "tokens_seen": 657457152 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008086677367576244, + "loss": 1.9473, + "theoretical_loss": 3.803106372248654, + "tokens_seen": 658505728 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.006944401655346155, + "objective/train/docs_used": 381585, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.197359561920166, + "objective/train/original_loss": 4.197360515594482, + "objective/train/theoretical_loss": 3.8030286793887647, + "objective/train/tokens_used": 679096800, + "objective/train/value_avg": -0.0175628662109375, + "objective/train/value_loss": 0.0012482975143939257, + "objective/train/value_max": -0.002010345458984375, + "objective/train/value_min": -0.218994140625, + "objective/train/value_reward_corr": 0.2120871970639943, + "objective/train/value_std": 0.0131683349609375, + "objective/train/weight_avg": 1.0007007122039795, + "objective/train/weighted_lm_loss": 4.201510906219482, + "objective/train/weights_max": 1.0220465660095215, + "objective/train/weights_min": 0.9510896801948547, + "theoretical_loss": 3.8030286793887647, + "tokens_seen": 658636800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008083467094703049, + "loss": 1.9253, + "theoretical_loss": 3.8024853829285172, + "tokens_seen": 659554304 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.00368120102211833, + "objective/train/docs_used": 382875, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5734245777130127, + "objective/train/original_loss": 3.5734241008758545, + "objective/train/theoretical_loss": 3.8020591853461596, + "objective/train/tokens_used": 680735200, + "objective/train/value_avg": -0.0222625732421875, + "objective/train/value_loss": 0.007699277717620134, + "objective/train/value_max": -0.0021495819091796875, + "objective/train/value_min": -0.314453125, + "objective/train/value_reward_corr": 0.1922836754266659, + "objective/train/value_std": 0.0222930908203125, + "objective/train/weight_avg": 1.0004057884216309, + "objective/train/weighted_lm_loss": 3.575913906097412, + "objective/train/weights_max": 1.0302982330322266, + "objective/train/weights_min": 0.9129483103752136, + "theoretical_loss": 3.8020591853461596, + "tokens_seen": 660275200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008080256821829855, + "loss": 1.9498, + "theoretical_loss": 3.801865656022783, + "tokens_seen": 660602880 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008077046548956661, + "loss": 1.9153, + "theoretical_loss": 3.801247186967348, + "tokens_seen": 661651456 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.009574326686561108, + "objective/train/docs_used": 383560, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.1845221519470215, + "objective/train/original_loss": 4.1845221519470215, + "objective/train/theoretical_loss": 3.8010927657100013, + "objective/train/tokens_used": 682373600, + "objective/train/value_avg": -0.01934814453125, + "objective/train/value_loss": 0.00471398513764143, + "objective/train/value_max": -0.0020275115966796875, + "objective/train/value_min": -0.37841796875, + "objective/train/value_reward_corr": 0.16889390485517655, + "objective/train/value_std": 0.0187835693359375, + "objective/train/weight_avg": 1.0009804964065552, + "objective/train/weighted_lm_loss": 4.188657760620117, + "objective/train/weights_max": 1.0385018587112427, + "objective/train/weights_min": 0.9073129296302795, + "theoretical_loss": 3.8010927657100013, + "tokens_seen": 661913600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008073836276083468, + "loss": 1.9282, + "theoretical_loss": 3.8006299712218086, + "tokens_seen": 662700032 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.001985508017241955, + "objective/train/docs_used": 384103, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.402064561843872, + "objective/train/original_loss": 3.402064323425293, + "objective/train/theoretical_loss": 3.8001294031571007, + "objective/train/tokens_used": 684012000, + "objective/train/value_avg": -0.01959228515625, + "objective/train/value_loss": 0.007753743790090084, + "objective/train/value_max": -0.0021152496337890625, + "objective/train/value_min": -0.2432861328125, + "objective/train/value_reward_corr": 0.16410675157960813, + "objective/train/value_std": 0.0201568603515625, + "objective/train/weight_avg": 1.0002365112304688, + "objective/train/weighted_lm_loss": 3.4019503593444824, + "objective/train/weights_max": 1.0235891342163086, + "objective/train/weights_min": 0.9116743206977844, + "theoretical_loss": 3.8001294031571007, + "tokens_seen": 663552000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008070626003210273, + "loss": 1.927, + "theoretical_loss": 3.8000140042693022, + "tokens_seen": 663748608 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008067415730337079, + "loss": 1.9076, + "theoretical_loss": 3.799399281616348, + "tokens_seen": 664797184 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.010632694698870182, + "objective/train/docs_used": 385247, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.211084842681885, + "objective/train/original_loss": 4.211085319519043, + "objective/train/theoretical_loss": 3.7991690805043445, + "objective/train/tokens_used": 685650400, + "objective/train/value_avg": -0.0200653076171875, + "objective/train/value_loss": 0.0023715656716376543, + "objective/train/value_max": -0.001941680908203125, + "objective/train/value_min": -0.322998046875, + "objective/train/value_reward_corr": 0.123924272827351, + "objective/train/value_std": 0.01491546630859375, + "objective/train/weight_avg": 1.001075029373169, + "objective/train/weighted_lm_loss": 4.215487957000732, + "objective/train/weights_max": 1.0327659845352173, + "objective/train/weights_min": 0.9302045702934265, + "theoretical_loss": 3.7991690805043445, + "tokens_seen": 665190400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008064205457463885, + "loss": 1.9377, + "theoretical_loss": 3.798785798792688, + "tokens_seen": 665845760 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.005288764834403992, + "objective/train/docs_used": 385855, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5338265895843506, + "objective/train/original_loss": 3.5338261127471924, + "objective/train/theoretical_loss": 3.79821178070722, + "objective/train/tokens_used": 687288800, + "objective/train/value_avg": -0.024688720703125, + "objective/train/value_loss": 0.006504475604742765, + "objective/train/value_max": -0.0016813278198242188, + "objective/train/value_min": -0.42431640625, + "objective/train/value_reward_corr": 0.4234902589587648, + "objective/train/value_std": 0.027801513671875, + "objective/train/weight_avg": 1.000560998916626, + "objective/train/weighted_lm_loss": 3.536867380142212, + "objective/train/weights_max": 1.0432337522506714, + "objective/train/weights_min": 0.9183399081230164, + "theoretical_loss": 3.79821178070722, + "tokens_seen": 666828800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008060995184590691, + "loss": 1.8697, + "theoretical_loss": 3.798173551351132, + "tokens_seen": 666894336 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008057784911717496, + "loss": 1.9112, + "theoretical_loss": 3.797562534867401, + "tokens_seen": 667942912 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.01586802862584591, + "objective/train/docs_used": 387212, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.180985450744629, + "objective/train/original_loss": 4.180985450744629, + "objective/train/theoretical_loss": 3.797257486858361, + "objective/train/tokens_used": 688927200, + "objective/train/value_avg": -0.025054931640625, + "objective/train/value_loss": 0.023847682401537895, + "objective/train/value_max": -0.002269744873046875, + "objective/train/value_min": -0.364990234375, + "objective/train/value_reward_corr": 0.392811116179898, + "objective/train/value_std": 0.0284576416015625, + "objective/train/weight_avg": 0.9985294342041016, + "objective/train/weighted_lm_loss": 4.174073219299316, + "objective/train/weights_max": 1.0316616296768188, + "objective/train/weights_min": 0.9078937768936157, + "theoretical_loss": 3.797257486858361, + "tokens_seen": 668467200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008054574638844302, + "loss": 1.8787, + "theoretical_loss": 3.796952744939976, + "tokens_seen": 668991488 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008051364365971108, + "loss": 1.9029, + "theoretical_loss": 3.7963441771899418, + "tokens_seen": 670040064 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.010203755460679531, + "objective/train/docs_used": 387914, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.956373929977417, + "objective/train/original_loss": 3.956374168395996, + "objective/train/theoretical_loss": 3.7963061821861084, + "objective/train/tokens_used": 690565600, + "objective/train/value_avg": -0.03277587890625, + "objective/train/value_loss": 0.007486241403967142, + "objective/train/value_max": -0.0020999908447265625, + "objective/train/value_min": -0.38818359375, + "objective/train/value_reward_corr": 0.543509382412576, + "objective/train/value_std": 0.049530029296875, + "objective/train/weight_avg": 1.0010571479797363, + "objective/train/weighted_lm_loss": 3.961223602294922, + "objective/train/weights_max": 1.029827356338501, + "objective/train/weights_min": 0.9089386463165283, + "theoretical_loss": 3.7963061821861084, + "tokens_seen": 670105600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008048154093097913, + "loss": 1.8903, + "theoretical_loss": 3.795736827260839, + "tokens_seen": 671088640 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -7.689838093938306e-05, + "objective/train/docs_used": 389368, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.064077854156494, + "objective/train/original_loss": 3.064077854156494, + "objective/train/theoretical_loss": 3.795357850053097, + "objective/train/tokens_used": 692204000, + "objective/train/value_avg": -0.0224151611328125, + "objective/train/value_loss": 0.004497057292610407, + "objective/train/value_max": -0.0018310546875, + "objective/train/value_min": -0.29736328125, + "objective/train/value_reward_corr": 0.264821951515415, + "objective/train/value_std": 0.0188140869140625, + "objective/train/weight_avg": 1.0000144243240356, + "objective/train/weighted_lm_loss": 3.064558982849121, + "objective/train/weights_max": 1.02357816696167, + "objective/train/weights_min": 0.9105502367019653, + "theoretical_loss": 3.795357850053097, + "tokens_seen": 671744000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008044943820224719, + "loss": 1.902, + "theoretical_loss": 3.795130690818514, + "tokens_seen": 672137216 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008041733547351525, + "loss": 1.8935, + "theoretical_loss": 3.7945257635509657, + "tokens_seen": 673185792 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.0016663267742842436, + "objective/train/docs_used": 390091, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5161843299865723, + "objective/train/original_loss": 3.5161845684051514, + "objective/train/theoretical_loss": 3.7944124739548526, + "objective/train/tokens_used": 693842400, + "objective/train/value_avg": -0.0176544189453125, + "objective/train/value_loss": 0.002023472683504224, + "objective/train/value_max": -0.0016813278198242188, + "objective/train/value_min": -0.1634521484375, + "objective/train/value_reward_corr": 0.8042283869046113, + "objective/train/value_std": 0.020538330078125, + "objective/train/weight_avg": 0.999843418598175, + "objective/train/weighted_lm_loss": 3.515664577484131, + "objective/train/weights_max": 1.0112236738204956, + "objective/train/weights_min": 0.9427839517593384, + "theoretical_loss": 3.7944124739548526, + "tokens_seen": 673382400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008038523274478331, + "loss": 1.9059, + "theoretical_loss": 3.793922041168204, + "tokens_seen": 674234368 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.012772741727530956, + "objective/train/docs_used": 391518, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7114710807800293, + "objective/train/original_loss": 3.7114713191986084, + "objective/train/theoretical_loss": 3.79347003751841, + "objective/train/tokens_used": 695480800, + "objective/train/value_avg": -0.041351318359375, + "objective/train/value_loss": 0.008115613833069801, + "objective/train/value_max": -0.0028667449951171875, + "objective/train/value_min": -0.58349609375, + "objective/train/value_reward_corr": 0.4208603315689237, + "objective/train/value_std": 0.06640625, + "objective/train/weight_avg": 1.0013171434402466, + "objective/train/weighted_lm_loss": 3.717620849609375, + "objective/train/weights_max": 1.0434291362762451, + "objective/train/weights_min": 0.910676896572113, + "theoretical_loss": 3.79347003751841, + "tokens_seen": 675020800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008035313001605136, + "loss": 1.8981, + "theoretical_loss": 3.7933195194020994, + "tokens_seen": 675282944 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008032102728731943, + "loss": 1.9241, + "theoretical_loss": 3.7927181940062407, + "tokens_seen": 676331520 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.011005531996488571, + "objective/train/docs_used": 392138, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.04875373840332, + "objective/train/original_loss": 4.04875373840332, + "objective/train/theoretical_loss": 3.7925305245009504, + "objective/train/tokens_used": 697119200, + "objective/train/value_avg": -0.0173187255859375, + "objective/train/value_loss": 0.00208362122066319, + "objective/train/value_max": -0.001987457275390625, + "objective/train/value_min": -0.287109375, + "objective/train/value_reward_corr": 0.34846735466346435, + "objective/train/value_std": 0.01418304443359375, + "objective/train/weight_avg": 1.0011107921600342, + "objective/train/weighted_lm_loss": 4.053353786468506, + "objective/train/weights_max": 1.0188276767730713, + "objective/train/weights_min": 0.909957766532898, + "theoretical_loss": 3.7925305245009504, + "tokens_seen": 676659200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008028892455858749, + "loss": 1.9403, + "theoretical_loss": 3.792118060755787, + "tokens_seen": 677380096 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.00012068181240465492, + "objective/train/docs_used": 393619, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.639446973800659, + "objective/train/original_loss": 3.63944673538208, + "objective/train/theoretical_loss": 3.7915939187884558, + "objective/train/tokens_used": 698757600, + "objective/train/value_avg": -0.0282745361328125, + "objective/train/value_loss": 0.005897785071283579, + "objective/train/value_max": -0.0022602081298828125, + "objective/train/value_min": -0.300048828125, + "objective/train/value_reward_corr": 0.5075030146437022, + "objective/train/value_std": 0.036712646484375, + "objective/train/weight_avg": 1.000041127204895, + "objective/train/weighted_lm_loss": 3.639998197555542, + "objective/train/weights_max": 1.0299196243286133, + "objective/train/weights_min": 0.9180324077606201, + "theoretical_loss": 3.7915939187884558, + "tokens_seen": 678297600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008025682182985554, + "loss": 1.9162, + "theoretical_loss": 3.7915191154473287, + "tokens_seen": 678428672 + }, + { + "epoch": 0.21, + "learning_rate": 0.000802247191011236, + "loss": 1.8942, + "theoretical_loss": 3.790921353898745, + "tokens_seen": 679477248 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.01826149970293045, + "objective/train/docs_used": 393966, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2219855785369873, + "objective/train/original_loss": 3.2219855785369873, + "objective/train/theoretical_loss": 3.790660204394375, + "objective/train/tokens_used": 700396000, + "objective/train/value_avg": -0.023406982421875, + "objective/train/value_loss": 0.0013509956188499928, + "objective/train/value_max": -0.00206756591796875, + "objective/train/value_min": -0.369140625, + "objective/train/value_reward_corr": 0.2502050577885325, + "objective/train/value_std": 0.023712158203125, + "objective/train/weight_avg": 1.0018328428268433, + "objective/train/weighted_lm_loss": 3.2272958755493164, + "objective/train/weights_max": 1.0374990701675415, + "objective/train/weights_min": 0.9293122291564941, + "theoretical_loss": 3.790660204394375, + "tokens_seen": 679936000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008019261637239166, + "loss": 1.8871, + "theoretical_loss": 3.790324771949063, + "tokens_seen": 680525824 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.0018307630671188235, + "objective/train/docs_used": 395160, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5174856185913086, + "objective/train/original_loss": 3.517486095428467, + "objective/train/theoretical_loss": 3.7897293654583164, + "objective/train/tokens_used": 702034400, + "objective/train/value_avg": -0.0234832763671875, + "objective/train/value_loss": 0.01244338694959879, + "objective/train/value_max": -0.0016679763793945312, + "objective/train/value_min": -0.339599609375, + "objective/train/value_reward_corr": 0.3774388493289113, + "objective/train/value_std": 0.0311279296875, + "objective/train/weight_avg": 0.9998776912689209, + "objective/train/weighted_lm_loss": 3.516608715057373, + "objective/train/weights_max": 1.0264463424682617, + "objective/train/weights_min": 0.90794837474823, + "theoretical_loss": 3.7897293654583164, + "tokens_seen": 681574400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008016051364365972, + "loss": 1.8593, + "theoretical_loss": 3.7897293654583164, + "tokens_seen": 681574400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008012841091492777, + "loss": 1.9099, + "theoretical_loss": 3.7891351303074123, + "tokens_seen": 682622976 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.009675837121903896, + "objective/train/docs_used": 395993, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.154585838317871, + "objective/train/original_loss": 4.154586315155029, + "objective/train/theoretical_loss": 3.788801386244749, + "objective/train/tokens_used": 703672800, + "objective/train/value_avg": -0.0239105224609375, + "objective/train/value_loss": 0.0024257912300527096, + "objective/train/value_max": -0.001964569091796875, + "objective/train/value_min": -0.2398681640625, + "objective/train/value_reward_corr": 0.47416437675328105, + "objective/train/value_std": 0.0262603759765625, + "objective/train/weight_avg": 1.0009796619415283, + "objective/train/weighted_lm_loss": 4.158566474914551, + "objective/train/weights_max": 1.0241405963897705, + "objective/train/weights_min": 0.9086169600486755, + "theoretical_loss": 3.788801386244749, + "tokens_seen": 683212800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008009630818619583, + "loss": 1.8941, + "theoretical_loss": 3.7885420623979886, + "tokens_seen": 683671552 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008006420545746389, + "loss": 1.8687, + "theoretical_loss": 3.787950157652282, + "tokens_seen": 684720128 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.010540186427533627, + "objective/train/docs_used": 397274, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.122130393981934, + "objective/train/original_loss": 4.122130393981934, + "objective/train/theoretical_loss": 3.7878762511417223, + "objective/train/tokens_used": 705311200, + "objective/train/value_avg": -0.017333984375, + "objective/train/value_loss": 0.0016513754380866885, + "objective/train/value_max": -0.0017337799072265625, + "objective/train/value_min": -0.226806640625, + "objective/train/value_reward_corr": 0.20182639074396347, + "objective/train/value_std": 0.01739501953125, + "objective/train/weight_avg": 1.0010621547698975, + "objective/train/weighted_lm_loss": 4.126324653625488, + "objective/train/weights_max": 1.0228407382965088, + "objective/train/weights_min": 0.9148741364479065, + "theoretical_loss": 3.7878762511417223, + "tokens_seen": 684851200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008003210272873194, + "loss": 1.8818, + "theoretical_loss": 3.78735941201299, + "tokens_seen": 685768704 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.002405761741101742, + "objective/train/docs_used": 397954, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.077212333679199, + "objective/train/original_loss": 4.077211380004883, + "objective/train/theoretical_loss": 3.786953944659605, + "objective/train/tokens_used": 706949600, + "objective/train/value_avg": -0.022369384765625, + "objective/train/value_loss": 0.007764915935695171, + "objective/train/value_max": -0.0018825531005859375, + "objective/train/value_min": -0.239501953125, + "objective/train/value_reward_corr": 0.287367944127196, + "objective/train/value_std": 0.021331787109375, + "objective/train/weight_avg": 0.9997974634170532, + "objective/train/weighted_lm_loss": 4.07654333114624, + "objective/train/weights_max": 1.0238349437713623, + "objective/train/weights_min": 0.9102352857589722, + "theoretical_loss": 3.786953944659605, + "tokens_seen": 686489600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008, + "loss": 1.8553, + "theoretical_loss": 3.786769821443141, + "tokens_seen": 686817280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007996789727126806, + "loss": 1.8643, + "theoretical_loss": 3.7861813819259575, + "tokens_seen": 687865856 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.014219271950423717, + "objective/train/docs_used": 398552, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.024502277374268, + "objective/train/original_loss": 4.024501800537109, + "objective/train/theoretical_loss": 3.7860344514298374, + "objective/train/tokens_used": 708588000, + "objective/train/value_avg": -0.0186614990234375, + "objective/train/value_loss": 0.001884474535472691, + "objective/train/value_max": -0.0018177032470703125, + "objective/train/value_min": -0.264404296875, + "objective/train/value_reward_corr": 0.029863778440173603, + "objective/train/value_std": 0.0170135498046875, + "objective/train/weight_avg": 1.0014312267303467, + "objective/train/weighted_lm_loss": 4.029609203338623, + "objective/train/weights_max": 1.0263677835464478, + "objective/train/weights_min": 0.9137383699417114, + "theoretical_loss": 3.7860344514298374, + "tokens_seen": 688128000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007993579454253613, + "loss": 1.8597, + "theoretical_loss": 3.7855940894647278, + "tokens_seen": 688914432 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.011031378991901875, + "objective/train/docs_used": 399765, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.554638385772705, + "objective/train/original_loss": 3.554638624191284, + "objective/train/theoretical_loss": 3.7851177562036957, + "objective/train/tokens_used": 710226400, + "objective/train/value_avg": -0.0136566162109375, + "objective/train/value_loss": 0.00034150780993513763, + "objective/train/value_max": -0.00154876708984375, + "objective/train/value_min": -0.184814453125, + "objective/train/value_reward_corr": 0.06488168886505555, + "objective/train/value_std": 0.0111083984375, + "objective/train/weight_avg": 1.0011048316955566, + "objective/train/weighted_lm_loss": 3.558088779449463, + "objective/train/weights_max": 1.0185779333114624, + "objective/train/weights_min": 0.980776846408844, + "theoretical_loss": 3.7851177562036957, + "tokens_seen": 689766400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007990369181380418, + "loss": 1.9016, + "theoretical_loss": 3.785007940082673, + "tokens_seen": 689963008 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007987158908507224, + "loss": 1.8593, + "theoretical_loss": 3.7844229298228176, + "tokens_seen": 691011584 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.002980219665914774, + "objective/train/docs_used": 400468, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.714526414871216, + "objective/train/original_loss": 3.714526891708374, + "objective/train/theoretical_loss": 3.7842038438510803, + "objective/train/tokens_used": 711864800, + "objective/train/value_avg": -0.025665283203125, + "objective/train/value_loss": 0.004847790580242872, + "objective/train/value_max": -0.001995086669921875, + "objective/train/value_min": -0.362060546875, + "objective/train/value_reward_corr": 0.6709424376030492, + "objective/train/value_std": 0.04290771484375, + "objective/train/weight_avg": 1.0003219842910767, + "objective/train/weighted_lm_loss": 3.716310977935791, + "objective/train/weights_max": 1.0287413597106934, + "objective/train/weights_min": 0.9317183494567871, + "theoretical_loss": 3.7842038438510803, + "tokens_seen": 691404800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007983948635634029, + "loss": 1.8761, + "theoretical_loss": 3.7838390547478635, + "tokens_seen": 692060160 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.008675076998770237, + "objective/train/docs_used": 401917, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.908867359161377, + "objective/train/original_loss": 3.908867597579956, + "objective/train/theoretical_loss": 3.783292699359313, + "objective/train/tokens_used": 713503200, + "objective/train/value_avg": -0.0294036865234375, + "objective/train/value_loss": 0.00540179293602705, + "objective/train/value_max": -0.0017681121826171875, + "objective/train/value_min": -0.376708984375, + "objective/train/value_reward_corr": 0.5756083075708129, + "objective/train/value_std": 0.047821044921875, + "objective/train/weight_avg": 1.0008941888809204, + "objective/train/weighted_lm_loss": 3.912337064743042, + "objective/train/weights_max": 1.0300171375274658, + "objective/train/weights_min": 0.9070391654968262, + "theoretical_loss": 3.783292699359313, + "tokens_seen": 693043200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007980738362760835, + "loss": 1.8768, + "theoretical_loss": 3.78325631094006, + "tokens_seen": 693108736 + }, + { + "epoch": 0.21, + "learning_rate": 0.000797752808988764, + "loss": 1.8477, + "theoretical_loss": 3.782674694501079, + "tokens_seen": 694157312 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0013141011586412787, + "objective/train/docs_used": 402587, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.616726875305176, + "objective/train/original_loss": 3.616727352142334, + "objective/train/theoretical_loss": 3.782384307831949, + "objective/train/tokens_used": 715141600, + "objective/train/value_avg": -0.07708740234375, + "objective/train/value_loss": 0.017228076234459877, + "objective/train/value_max": -0.0015430450439453125, + "objective/train/value_min": -0.81689453125, + "objective/train/value_reward_corr": 0.7697892067426766, + "objective/train/value_std": 0.142333984375, + "objective/train/weight_avg": 1.0002162456512451, + "objective/train/weighted_lm_loss": 3.618403673171997, + "objective/train/weights_max": 1.0408612489700317, + "objective/train/weights_min": 0.9067308902740479, + "theoretical_loss": 3.782384307831949, + "tokens_seen": 694681600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007974317817014446, + "loss": 1.9025, + "theoretical_loss": 3.782094201551887, + "tokens_seen": 695205888 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007971107544141252, + "loss": 1.8629, + "theoretical_loss": 3.7815148282326243, + "tokens_seen": 696254464 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.00600664084777236, + "objective/train/docs_used": 403173, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9560256004333496, + "objective/train/original_loss": 2.9560256004333496, + "objective/train/theoretical_loss": 3.7814786544876062, + "objective/train/tokens_used": 716780000, + "objective/train/value_avg": -0.018768310546875, + "objective/train/value_loss": 0.005704844370484352, + "objective/train/value_max": -0.0018672943115234375, + "objective/train/value_min": -0.292724609375, + "objective/train/value_reward_corr": 0.15180992918401193, + "objective/train/value_std": 0.01763916015625, + "objective/train/weight_avg": 1.0006283521652222, + "objective/train/weighted_lm_loss": 2.957024574279785, + "objective/train/weights_max": 1.0265737771987915, + "objective/train/weights_min": 0.9065344333648682, + "theoretical_loss": 3.7814786544876062, + "tokens_seen": 696320000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007967897271268057, + "loss": 1.8204, + "theoretical_loss": 3.780936570702478, + "tokens_seen": 697303040 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.0016015178989619017, + "objective/train/docs_used": 404489, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.149423837661743, + "objective/train/original_loss": 3.149423599243164, + "objective/train/theoretical_loss": 3.780575724658811, + "objective/train/tokens_used": 718418400, + "objective/train/value_avg": -0.037384033203125, + "objective/train/value_loss": 0.016403429210186005, + "objective/train/value_max": -0.001926422119140625, + "objective/train/value_min": -0.468505859375, + "objective/train/value_reward_corr": 0.4615364888335355, + "objective/train/value_std": 0.051910400390625, + "objective/train/weight_avg": 0.9999201893806458, + "objective/train/weighted_lm_loss": 3.1497457027435303, + "objective/train/weights_max": 1.0394169092178345, + "objective/train/weights_min": 0.9087803363800049, + "theoretical_loss": 3.780575724658811, + "tokens_seen": 697958400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007964686998394863, + "loss": 1.7977, + "theoretical_loss": 3.780359425139562, + "tokens_seen": 698351616 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007961476725521669, + "loss": 1.7955, + "theoretical_loss": 3.7797833877407947, + "tokens_seen": 699400192 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.011129672639071941, + "objective/train/docs_used": 405172, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9106392860412598, + "objective/train/original_loss": 3.9106388092041016, + "objective/train/theoretical_loss": 3.779675503790851, + "objective/train/tokens_used": 720056800, + "objective/train/value_avg": -0.0164031982421875, + "objective/train/value_loss": 0.0012139256577938795, + "objective/train/value_max": -0.00183868408203125, + "objective/train/value_min": -0.31884765625, + "objective/train/value_reward_corr": 0.10151430719628986, + "objective/train/value_std": 0.01433563232421875, + "objective/train/weight_avg": 1.0011188983917236, + "objective/train/weighted_lm_loss": 3.9150359630584717, + "objective/train/weights_max": 1.0323268175125122, + "objective/train/weights_min": 0.9062350988388062, + "theoretical_loss": 3.779675503790851, + "tokens_seen": 699596800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007958266452648475, + "loss": 1.8295, + "theoretical_loss": 3.779208454721779, + "tokens_seen": 700448768 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.01016936544328928, + "objective/train/docs_used": 406046, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.888840436935425, + "objective/train/original_loss": 3.888840675354004, + "objective/train/theoretical_loss": 3.778777977440649, + "objective/train/tokens_used": 721695200, + "objective/train/value_avg": -0.0150604248046875, + "objective/train/value_loss": 0.0019896093290299177, + "objective/train/value_max": -0.001628875732421875, + "objective/train/value_min": -0.22265625, + "objective/train/value_reward_corr": 0.13447694219457357, + "objective/train/value_std": 0.01361083984375, + "objective/train/weight_avg": 1.0010266304016113, + "objective/train/weighted_lm_loss": 3.8926970958709717, + "objective/train/weights_max": 1.0211304426193237, + "objective/train/weights_min": 0.9188173413276672, + "theoretical_loss": 3.778777977440649, + "tokens_seen": 701235200 + }, + { + "epoch": 0.21, + "learning_rate": 0.000795505617977528, + "loss": 1.8597, + "theoretical_loss": 3.7786346223166802, + "tokens_seen": 701497344 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007951845906902087, + "loss": 1.8447, + "theoretical_loss": 3.778061886778111, + "tokens_seen": 702545920 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.04334617033600807, + "objective/train/docs_used": 406800, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8625848293304443, + "objective/train/original_loss": 3.8625848293304443, + "objective/train/theoretical_loss": 3.77788313127565, + "objective/train/tokens_used": 723333600, + "objective/train/value_avg": -0.0283050537109375, + "objective/train/value_loss": 0.049307744950056076, + "objective/train/value_max": -0.0019121170043945312, + "objective/train/value_min": -0.473388671875, + "objective/train/value_reward_corr": 0.5439088228870932, + "objective/train/value_std": 0.034393310546875, + "objective/train/weight_avg": 0.9959053993225098, + "objective/train/weighted_lm_loss": 3.8426077365875244, + "objective/train/weights_max": 1.0318846702575684, + "objective/train/weights_min": 0.9072584509849548, + "theoretical_loss": 3.77788313127565, + "tokens_seen": 702873600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007948635634028893, + "loss": 1.843, + "theoretical_loss": 3.7774902443770113, + "tokens_seen": 703594496 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.0006080878665670753, + "objective/train/docs_used": 408057, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.457519054412842, + "objective/train/original_loss": 3.4575188159942627, + "objective/train/theoretical_loss": 3.7769909510727144, + "objective/train/tokens_used": 724972000, + "objective/train/value_avg": -0.0341796875, + "objective/train/value_loss": 0.008561024442315102, + "objective/train/value_max": -0.0016813278198242188, + "objective/train/value_min": -0.4267578125, + "objective/train/value_reward_corr": 0.6824043800695259, + "objective/train/value_std": 0.051025390625, + "objective/train/weight_avg": 0.9999815225601196, + "objective/train/weighted_lm_loss": 3.458151340484619, + "objective/train/weights_max": 1.0299594402313232, + "objective/train/weights_min": 0.9229084849357605, + "theoretical_loss": 3.7769909510727144, + "tokens_seen": 704512000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007945425361155698, + "loss": 1.8273, + "theoretical_loss": 3.776919691402532, + "tokens_seen": 704643072 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007942215088282504, + "loss": 1.8742, + "theoretical_loss": 3.7763502241619205, + "tokens_seen": 705691648 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.005289024207741022, + "objective/train/docs_used": 408602, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.715012311935425, + "objective/train/original_loss": 3.7150118350982666, + "objective/train/theoretical_loss": 3.7761014227170375, + "objective/train/tokens_used": 726610400, + "objective/train/value_avg": -0.0364990234375, + "objective/train/value_loss": 0.009412446990609169, + "objective/train/value_max": -0.0015850067138671875, + "objective/train/value_min": -0.360595703125, + "objective/train/value_reward_corr": 0.5107059379618767, + "objective/train/value_std": 0.0513916015625, + "objective/train/weight_avg": 0.9995172023773193, + "objective/train/weighted_lm_loss": 3.7131755352020264, + "objective/train/weights_max": 1.0318610668182373, + "objective/train/weights_min": 0.9064109325408936, + "theoretical_loss": 3.7761014227170375, + "tokens_seen": 706150400 + }, + { + "epoch": 0.21, + "learning_rate": 0.000793900481540931, + "loss": 1.8492, + "theoretical_loss": 3.7757818389804023, + "tokens_seen": 706740224 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.008780918084084988, + "objective/train/docs_used": 409727, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.07211971282959, + "objective/train/original_loss": 3.0721194744110107, + "objective/train/theoretical_loss": 3.775214532201071, + "objective/train/tokens_used": 728248800, + "objective/train/value_avg": -0.0152740478515625, + "objective/train/value_loss": 0.0018787677399814129, + "objective/train/value_max": -0.0016422271728515625, + "objective/train/value_min": -0.25537109375, + "objective/train/value_reward_corr": 0.048055799478991176, + "objective/train/value_std": 0.0156402587890625, + "objective/train/weight_avg": 1.000887393951416, + "objective/train/weighted_lm_loss": 3.0744450092315674, + "objective/train/weights_max": 1.025795340538025, + "objective/train/weights_min": 0.9273953437805176, + "theoretical_loss": 3.775214532201071, + "tokens_seen": 707788800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007935794542536116, + "loss": 1.8511, + "theoretical_loss": 3.775214532201071, + "tokens_seen": 707788800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007932584269662921, + "loss": 1.9117, + "theoretical_loss": 3.774648300184772, + "tokens_seen": 708837376 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.0033189000096172094, + "objective/train/docs_used": 410190, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.692988872528076, + "objective/train/original_loss": 3.6929891109466553, + "objective/train/theoretical_loss": 3.7743302656234627, + "objective/train/tokens_used": 729887200, + "objective/train/value_avg": -0.027587890625, + "objective/train/value_loss": 0.014893759042024612, + "objective/train/value_max": -0.001964569091796875, + "objective/train/value_min": -0.33056640625, + "objective/train/value_reward_corr": 0.42150130303903033, + "objective/train/value_std": 0.033233642578125, + "objective/train/weight_avg": 0.9997409582138062, + "objective/train/weighted_lm_loss": 3.6905126571655273, + "objective/train/weights_max": 1.0279462337493896, + "objective/train/weights_min": 0.9072486162185669, + "theoretical_loss": 3.7743302656234627, + "tokens_seen": 709427200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007929373996789727, + "loss": 1.8599, + "theoretical_loss": 3.774083139309993, + "tokens_seen": 709885952 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007926163723916533, + "loss": 1.8882, + "theoretical_loss": 3.7735190459727486, + "tokens_seen": 710934528 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.00042099825805053115, + "objective/train/docs_used": 411414, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9011080265045166, + "objective/train/original_loss": 3.9011082649230957, + "objective/train/theoretical_loss": 3.7734486091880095, + "objective/train/tokens_used": 731525600, + "objective/train/value_avg": -0.019775390625, + "objective/train/value_loss": 0.007427602540701628, + "objective/train/value_max": -0.0014324188232421875, + "objective/train/value_min": -0.43310546875, + "objective/train/value_reward_corr": 0.3486163072215067, + "objective/train/value_std": 0.01971435546875, + "objective/train/weight_avg": 1.0000783205032349, + "objective/train/weighted_lm_loss": 3.900707721710205, + "objective/train/weights_max": 1.0207877159118652, + "objective/train/weights_min": 0.9085108041763306, + "theoretical_loss": 3.7734486091880095, + "tokens_seen": 711065600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007922953451043338, + "loss": 1.8643, + "theoretical_loss": 3.7729560165864746, + "tokens_seen": 711983104 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.001653847168199718, + "objective/train/docs_used": 412148, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9018757343292236, + "objective/train/original_loss": 3.9018757343292236, + "objective/train/theoretical_loss": 3.77256954920262, + "objective/train/tokens_used": 733164000, + "objective/train/value_avg": -0.01763916015625, + "objective/train/value_loss": 0.007213841658085585, + "objective/train/value_max": -0.0017824172973632812, + "objective/train/value_min": -0.29248046875, + "objective/train/value_reward_corr": 0.38314583984743633, + "objective/train/value_std": 0.017669677734375, + "objective/train/weight_avg": 1.000200629234314, + "objective/train/weighted_lm_loss": 3.901994466781616, + "objective/train/weights_max": 1.0228774547576904, + "objective/train/weights_min": 0.907509446144104, + "theoretical_loss": 3.77256954920262, + "tokens_seen": 712704000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007919743178170144, + "loss": 1.86, + "theoretical_loss": 3.7723940475819147, + "tokens_seen": 713031680 + }, + { + "epoch": 0.22, + "learning_rate": 0.000791653290529695, + "loss": 1.843, + "theoretical_loss": 3.7718331354070127, + "tokens_seen": 714080256 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.007118587382137775, + "objective/train/docs_used": 413348, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.000607490539551, + "objective/train/original_loss": 4.000607013702393, + "objective/train/theoretical_loss": 3.7716930720782935, + "objective/train/tokens_used": 734802400, + "objective/train/value_avg": -0.017852783203125, + "objective/train/value_loss": 0.004991526715457439, + "objective/train/value_max": -0.0013513565063476562, + "objective/train/value_min": -0.44677734375, + "objective/train/value_reward_corr": 0.2695558351728706, + "objective/train/value_std": 0.02166748046875, + "objective/train/weight_avg": 1.0007363557815552, + "objective/train/weighted_lm_loss": 4.002736568450928, + "objective/train/weights_max": 1.035159945487976, + "objective/train/weights_min": 0.909859836101532, + "theoretical_loss": 3.7716930720782935, + "tokens_seen": 714342400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007913322632423757, + "loss": 1.8532, + "theoretical_loss": 3.771273276526805, + "tokens_seen": 715128832 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.011581148952245712, + "objective/train/docs_used": 413910, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7971198558807373, + "objective/train/original_loss": 3.797119379043579, + "objective/train/theoretical_loss": 3.770819164328109, + "objective/train/tokens_used": 736440800, + "objective/train/value_avg": -0.022674560546875, + "objective/train/value_loss": 0.00391964940354228, + "objective/train/value_max": -0.0015306472778320312, + "objective/train/value_min": -0.3271484375, + "objective/train/value_reward_corr": 0.2136916078178821, + "objective/train/value_std": 0.0231781005859375, + "objective/train/weight_avg": 1.001177430152893, + "objective/train/weighted_lm_loss": 3.8017232418060303, + "objective/train/weights_max": 1.030633568763733, + "objective/train/weights_min": 0.9093868732452393, + "theoretical_loss": 3.770819164328109, + "tokens_seen": 715980800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007910112359550562, + "loss": 1.8654, + "theoretical_loss": 3.770714467423313, + "tokens_seen": 716177408 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007906902086677368, + "loss": 1.8749, + "theoretical_loss": 3.7701567045954367, + "tokens_seen": 717225984 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.009337998926639557, + "objective/train/docs_used": 414484, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.863219738006592, + "objective/train/original_loss": 3.8632194995880127, + "objective/train/theoretical_loss": 3.769947812566226, + "objective/train/tokens_used": 738079200, + "objective/train/value_avg": -0.018218994140625, + "objective/train/value_loss": 0.0027661838103085756, + "objective/train/value_max": -0.001941680908203125, + "objective/train/value_min": -0.24658203125, + "objective/train/value_reward_corr": 0.10454720678347947, + "objective/train/value_std": 0.0194549560546875, + "objective/train/weight_avg": 1.0009474754333496, + "objective/train/weighted_lm_loss": 3.866755962371826, + "objective/train/weights_max": 1.0234684944152832, + "objective/train/weights_min": 0.9207395315170288, + "theoretical_loss": 3.769947812566226, + "tokens_seen": 717619200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007903691813804174, + "loss": 1.8542, + "theoretical_loss": 3.76959998455885, + "tokens_seen": 718274560 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": -0.00565704982727766, + "objective/train/docs_used": 415542, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.701390266418457, + "objective/train/original_loss": 3.701390027999878, + "objective/train/theoretical_loss": 3.7690790035069, + "objective/train/tokens_used": 739717600, + "objective/train/value_avg": -0.03826904296875, + "objective/train/value_loss": 0.007391328923404217, + "objective/train/value_max": -0.0014553070068359375, + "objective/train/value_min": -0.4345703125, + "objective/train/value_reward_corr": 0.5728188845804918, + "objective/train/value_std": 0.059112548828125, + "objective/train/weight_avg": 0.9994708299636841, + "objective/train/weighted_lm_loss": 3.701373815536499, + "objective/train/weights_max": 1.0384101867675781, + "objective/train/weights_min": 0.9162420630455017, + "theoretical_loss": 3.7690790035069, + "tokens_seen": 719257600 + }, + { + "epoch": 0.22, + "learning_rate": 0.000790048154093098, + "loss": 1.8607, + "theoretical_loss": 3.7690443038458943, + "tokens_seen": 719323136 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007897271268057785, + "loss": 1.8577, + "theoretical_loss": 3.7684896590054757, + "tokens_seen": 720371712 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": -0.0005135367973707616, + "objective/train/docs_used": 416165, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.012828826904297, + "objective/train/original_loss": 4.012828826904297, + "objective/train/theoretical_loss": 3.7682127239635053, + "objective/train/tokens_used": 741356000, + "objective/train/value_avg": -0.0205230712890625, + "objective/train/value_loss": 0.003877673763781786, + "objective/train/value_max": -0.0018177032470703125, + "objective/train/value_min": -0.2105712890625, + "objective/train/value_reward_corr": 0.17110567431427479, + "objective/train/value_std": 0.01654052734375, + "objective/train/weight_avg": 0.9999677538871765, + "objective/train/weighted_lm_loss": 4.016689300537109, + "objective/train/weights_max": 1.0197699069976807, + "objective/train/weights_min": 0.9077101349830627, + "theoretical_loss": 3.7682127239635053, + "tokens_seen": 720896000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007894060995184591, + "loss": 1.872, + "theoretical_loss": 3.767936046602963, + "tokens_seen": 721420288 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007890850722311397, + "loss": 1.7923, + "theoretical_loss": 3.7673834632200824, + "tokens_seen": 722468864 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.008428916335105896, + "objective/train/docs_used": 417606, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6691417694091797, + "objective/train/original_loss": 3.669142007827759, + "objective/train/theoretical_loss": 3.767348960847575, + "objective/train/tokens_used": 742994400, + "objective/train/value_avg": -0.01849365234375, + "objective/train/value_loss": 0.0011167657794430852, + "objective/train/value_max": -0.0012693405151367188, + "objective/train/value_min": -0.3115234375, + "objective/train/value_reward_corr": 0.5840778299152416, + "objective/train/value_std": 0.024505615234375, + "objective/train/weight_avg": 1.000848412513733, + "objective/train/weighted_lm_loss": 3.671909809112549, + "objective/train/weights_max": 1.0305293798446655, + "objective/train/weights_min": 0.9601459503173828, + "theoretical_loss": 3.767348960847575, + "tokens_seen": 722534400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007887640449438202, + "loss": 1.8373, + "theoretical_loss": 3.76683190545482, + "tokens_seen": 723517440 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.018443280830979347, + "objective/train/docs_used": 418215, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.489062786102295, + "objective/train/original_loss": 3.4890623092651367, + "objective/train/theoretical_loss": 3.7664877011678484, + "objective/train/tokens_used": 744632800, + "objective/train/value_avg": -0.051177978515625, + "objective/train/value_loss": 0.004011416807770729, + "objective/train/value_max": -0.0015611648559570312, + "objective/train/value_min": -0.798828125, + "objective/train/value_reward_corr": 0.8396358846203456, + "objective/train/value_std": 0.11151123046875, + "objective/train/weight_avg": 1.0018643140792847, + "objective/train/weighted_lm_loss": 3.4940483570098877, + "objective/train/weights_max": 1.046947717666626, + "objective/train/weights_min": 0.9125372767448425, + "theoretical_loss": 3.7664877011678484, + "tokens_seen": 724172800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007884430176565008, + "loss": 1.8533, + "theoretical_loss": 3.766281369921316, + "tokens_seen": 724566016 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007881219903691814, + "loss": 1.8351, + "theoretical_loss": 3.765731853249771, + "tokens_seen": 725614592 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0035567572340369225, + "objective/train/docs_used": 419550, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.025089740753174, + "objective/train/original_loss": 4.025089263916016, + "objective/train/theoretical_loss": 3.7656289320293315, + "objective/train/tokens_used": 746271200, + "objective/train/value_avg": -0.035888671875, + "objective/train/value_loss": 0.007766738533973694, + "objective/train/value_max": -0.002010345458984375, + "objective/train/value_min": -0.52099609375, + "objective/train/value_reward_corr": 0.7127119147445292, + "objective/train/value_std": 0.05975341796875, + "objective/train/weight_avg": 1.0003941059112549, + "objective/train/weighted_lm_loss": 4.0264081954956055, + "objective/train/weights_max": 1.0364748239517212, + "objective/train/weights_min": 0.9320607781410217, + "theoretical_loss": 3.7656289320293315, + "tokens_seen": 725811200 + }, + { + "epoch": 0.22, + "learning_rate": 0.000787800963081862, + "loss": 1.8388, + "theoretical_loss": 3.7651833520863396, + "tokens_seen": 726663168 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.013025332242250443, + "objective/train/docs_used": 420047, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9944007396698, + "objective/train/original_loss": 3.994400978088379, + "objective/train/theoretical_loss": 3.7647726406323665, + "objective/train/tokens_used": 747909600, + "objective/train/value_avg": -0.01971435546875, + "objective/train/value_loss": 0.002985607832670212, + "objective/train/value_max": -0.0019044876098632812, + "objective/train/value_min": -0.275146484375, + "objective/train/value_reward_corr": 0.16787846189897304, + "objective/train/value_std": 0.019287109375, + "objective/train/weight_avg": 1.0013172626495361, + "objective/train/weighted_lm_loss": 3.9991776943206787, + "objective/train/weights_max": 1.0259932279586792, + "objective/train/weights_min": 0.9272089004516602, + "theoretical_loss": 3.7647726406323665, + "tokens_seen": 727449600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007874799357945426, + "loss": 1.8228, + "theoretical_loss": 3.7646358630930385, + "tokens_seen": 727711744 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007871589085072232, + "loss": 1.8275, + "theoretical_loss": 3.7640893829476445, + "tokens_seen": 728760320 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0024998763110488653, + "objective/train/docs_used": 421294, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.617810010910034, + "objective/train/original_loss": 3.6178104877471924, + "objective/train/theoretical_loss": 3.763918814271718, + "objective/train/tokens_used": 749548000, + "objective/train/value_avg": -0.0224761962890625, + "objective/train/value_loss": 0.006890536285936832, + "objective/train/value_max": -0.00127410888671875, + "objective/train/value_min": -0.281494140625, + "objective/train/value_reward_corr": 0.30675236670680495, + "objective/train/value_std": 0.0254364013671875, + "objective/train/weight_avg": 1.0002835988998413, + "objective/train/weighted_lm_loss": 3.6188442707061768, + "objective/train/weights_max": 1.0283615589141846, + "objective/train/weights_min": 0.911958634853363, + "theoretical_loss": 3.763918814271718, + "tokens_seen": 729088000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007868378812199038, + "loss": 1.8233, + "theoretical_loss": 3.7635439083435998, + "tokens_seen": 729808896 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.013722884468734264, + "objective/train/docs_used": 422072, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4341845512390137, + "objective/train/original_loss": 3.4341845512390137, + "objective/train/theoretical_loss": 3.7630674403356625, + "objective/train/tokens_used": 751186400, + "objective/train/value_avg": -0.020965576171875, + "objective/train/value_loss": 0.0007171121542342007, + "objective/train/value_max": -0.0015974044799804688, + "objective/train/value_min": -0.1837158203125, + "objective/train/value_reward_corr": 0.38082625334524867, + "objective/train/value_std": 0.01947021484375, + "objective/train/weight_avg": 1.0013759136199951, + "objective/train/weighted_lm_loss": 3.439469337463379, + "objective/train/weights_max": 1.017849326133728, + "objective/train/weights_min": 0.9692692756652832, + "theoretical_loss": 3.7630674403356625, + "tokens_seen": 730726400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007865168539325843, + "loss": 1.7924, + "theoretical_loss": 3.762999435989914, + "tokens_seen": 730857472 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007861958266452649, + "loss": 1.8092, + "theoretical_loss": 3.76245596261107, + "tokens_seen": 731906048 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.007487548049539328, + "objective/train/docs_used": 423458, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5978505611419678, + "objective/train/original_loss": 3.5978505611419678, + "objective/train/theoretical_loss": 3.7622185063050937, + "objective/train/tokens_used": 752824800, + "objective/train/value_avg": -0.019073486328125, + "objective/train/value_loss": 0.0022892728447914124, + "objective/train/value_max": -0.0013828277587890625, + "objective/train/value_min": -0.30078125, + "objective/train/value_reward_corr": 0.19761297867857464, + "objective/train/value_std": 0.01849365234375, + "objective/train/weight_avg": 1.0007600784301758, + "objective/train/weighted_lm_loss": 3.599792242050171, + "objective/train/weights_max": 1.029210090637207, + "objective/train/weights_min": 0.9402376413345337, + "theoretical_loss": 3.7622185063050937, + "tokens_seen": 732364800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007858747993579455, + "loss": 1.8152, + "theoretical_loss": 3.7619134849469296, + "tokens_seen": 732954624 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": -0.004883222747594118, + "objective/train/docs_used": 424116, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.648768186569214, + "objective/train/original_loss": 3.6487679481506348, + "objective/train/theoretical_loss": 3.7613719997526367, + "objective/train/tokens_used": 754463200, + "objective/train/value_avg": -0.0246429443359375, + "objective/train/value_loss": 0.008477524854242802, + "objective/train/value_max": -0.001201629638671875, + "objective/train/value_min": -0.64599609375, + "objective/train/value_reward_corr": 0.4386229671386251, + "objective/train/value_std": 0.02911376953125, + "objective/train/weight_avg": 0.999553382396698, + "objective/train/weighted_lm_loss": 3.647590398788452, + "objective/train/weights_max": 1.022060751914978, + "objective/train/weights_min": 0.9079769849777222, + "theoretical_loss": 3.7613719997526367, + "tokens_seen": 734003200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007855537720706261, + "loss": 1.836, + "theoretical_loss": 3.7613719997526367, + "tokens_seen": 734003200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007852327447833066, + "loss": 1.7665, + "theoretical_loss": 3.760831503798527, + "tokens_seen": 735051776 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.010542000643908978, + "objective/train/docs_used": 424834, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.466414451599121, + "objective/train/original_loss": 3.4664146900177, + "objective/train/theoretical_loss": 3.760527908341773, + "objective/train/tokens_used": 756101600, + "objective/train/value_avg": -0.0229339599609375, + "objective/train/value_loss": 0.00575421005487442, + "objective/train/value_max": -0.00131988525390625, + "objective/train/value_min": -0.4658203125, + "objective/train/value_reward_corr": 0.12885428779770786, + "objective/train/value_std": 0.02447509765625, + "objective/train/weight_avg": 1.001082420349121, + "objective/train/weighted_lm_loss": 3.471039295196533, + "objective/train/weights_max": 1.0476065874099731, + "objective/train/weights_min": 0.9161869287490845, + "theoretical_loss": 3.760527908341773, + "tokens_seen": 735641600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007849117174959872, + "loss": 1.8099, + "theoretical_loss": 3.760291993870034, + "tokens_seen": 736100352 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007845906902086678, + "loss": 1.8223, + "theoretical_loss": 3.759753466767597, + "tokens_seen": 737148928 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.007286008447408676, + "objective/train/docs_used": 426204, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.162207841873169, + "objective/train/original_loss": 3.1622071266174316, + "objective/train/theoretical_loss": 3.7596862198259773, + "objective/train/tokens_used": 757740000, + "objective/train/value_avg": -0.038299560546875, + "objective/train/value_loss": 0.009992954321205616, + "objective/train/value_max": -0.0017547607421875, + "objective/train/value_min": -0.40380859375, + "objective/train/value_reward_corr": 0.3806971618560929, + "objective/train/value_std": 0.043121337890625, + "objective/train/weight_avg": 1.0007776021957397, + "objective/train/weighted_lm_loss": 3.1640143394470215, + "objective/train/weights_max": 1.0357609987258911, + "objective/train/weights_min": 0.9071973562240601, + "theoretical_loss": 3.7596862198259773, + "tokens_seen": 737280000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007842696629213483, + "loss": 1.8008, + "theoretical_loss": 3.7592159193065697, + "tokens_seen": 738197504 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.01121287140995264, + "objective/train/docs_used": 426747, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.473106861114502, + "objective/train/original_loss": 3.473106861114502, + "objective/train/theoretical_loss": 3.7588469220478586, + "objective/train/tokens_used": 759378400, + "objective/train/value_avg": -0.0254974365234375, + "objective/train/value_loss": 0.0025315359234809875, + "objective/train/value_max": -0.0018243789672851562, + "objective/train/value_min": -0.275634765625, + "objective/train/value_reward_corr": 0.36125639321643443, + "objective/train/value_std": 0.032318115234375, + "objective/train/weight_avg": 1.001133918762207, + "objective/train/weighted_lm_loss": 3.477895498275757, + "objective/train/weights_max": 1.0245063304901123, + "objective/train/weights_min": 0.9513359069824219, + "theoretical_loss": 3.7588469220478586, + "tokens_seen": 738918400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007839486356340289, + "loss": 1.7764, + "theoretical_loss": 3.758679348317131, + "tokens_seen": 739246080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007836276083467096, + "loss": 1.8078, + "theoretical_loss": 3.7581437506441926, + "tokens_seen": 740294656 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.003267053747549653, + "objective/train/docs_used": 427723, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6133341789245605, + "objective/train/original_loss": 3.6133339405059814, + "objective/train/theoretical_loss": 3.75801000293832, + "objective/train/tokens_used": 761016800, + "objective/train/value_avg": -0.0291748046875, + "objective/train/value_loss": 0.009452927857637405, + "objective/train/value_max": -0.0015125274658203125, + "objective/train/value_min": -0.5126953125, + "objective/train/value_reward_corr": 0.405459388956705, + "objective/train/value_std": 0.035125732421875, + "objective/train/weight_avg": 1.000373125076294, + "objective/train/weighted_lm_loss": 3.6134915351867676, + "objective/train/weights_max": 1.0355881452560425, + "objective/train/weights_min": 0.9069731831550598, + "theoretical_loss": 3.75801000293832, + "tokens_seen": 740556800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007833065810593901, + "loss": 1.8005, + "theoretical_loss": 3.7576091231473114, + "tokens_seen": 741343232 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.008520631119608879, + "objective/train/docs_used": 429091, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0782697200775146, + "objective/train/original_loss": 3.078270196914673, + "objective/train/theoretical_loss": 3.7571754505157218, + "objective/train/tokens_used": 762655200, + "objective/train/value_avg": -0.0243682861328125, + "objective/train/value_loss": 0.006965545006096363, + "objective/train/value_max": -0.0018100738525390625, + "objective/train/value_min": -0.59912109375, + "objective/train/value_reward_corr": 0.39145756689058403, + "objective/train/value_std": 0.02886962890625, + "objective/train/weight_avg": 1.000886082649231, + "objective/train/weighted_lm_loss": 3.080050230026245, + "objective/train/weights_max": 1.0411971807479858, + "objective/train/weights_min": 0.9069422483444214, + "theoretical_loss": 3.7571754505157218, + "tokens_seen": 742195200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007829855537720706, + "loss": 1.8224, + "theoretical_loss": 3.7570754627006018, + "tokens_seen": 742391808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007826645264847512, + "loss": 1.8418, + "theoretical_loss": 3.756542766192646, + "tokens_seen": 743440384 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.012945657595992088, + "objective/train/docs_used": 429832, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8507235050201416, + "objective/train/original_loss": 3.8507232666015625, + "objective/train/theoretical_loss": 3.756343252885055, + "objective/train/tokens_used": 764293600, + "objective/train/value_avg": -0.0279693603515625, + "objective/train/value_loss": 0.004048232454806566, + "objective/train/value_max": -0.0018749237060546875, + "objective/train/value_min": -0.362548828125, + "objective/train/value_reward_corr": 0.2632539982786328, + "objective/train/value_std": 0.029327392578125, + "objective/train/weight_avg": 1.0013145208358765, + "objective/train/weighted_lm_loss": 3.857090711593628, + "objective/train/weights_max": 1.0367258787155151, + "objective/train/weights_min": 0.907709002494812, + "theoretical_loss": 3.756343252885055, + "tokens_seen": 743833600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007823434991974318, + "loss": 1.7971, + "theoretical_loss": 3.7560110305264054, + "tokens_seen": 744488960 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.009266621433198452, + "objective/train/docs_used": 430489, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3915750980377197, + "objective/train/original_loss": 3.391575336456299, + "objective/train/theoretical_loss": 3.755513398237129, + "objective/train/tokens_used": 765932000, + "objective/train/value_avg": -0.0191497802734375, + "objective/train/value_loss": 0.001840459299273789, + "objective/train/value_max": -0.0017681121826171875, + "objective/train/value_min": -0.3251953125, + "objective/train/value_reward_corr": 0.209239171344741, + "objective/train/value_std": 0.0186004638671875, + "objective/train/weight_avg": 1.0009357929229736, + "objective/train/weighted_lm_loss": 3.3944199085235596, + "objective/train/weights_max": 1.0220648050308228, + "objective/train/weights_min": 0.9302854537963867, + "theoretical_loss": 3.755513398237129, + "tokens_seen": 745472000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007820224719101123, + "loss": 1.7865, + "theoretical_loss": 3.7554802526191393, + "tokens_seen": 745537536 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007817014446227929, + "loss": 1.8335, + "theoretical_loss": 3.7549504294023137, + "tokens_seen": 746586112 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.007484976667910814, + "objective/train/docs_used": 431396, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.280696153640747, + "objective/train/original_loss": 3.280696153640747, + "objective/train/theoretical_loss": 3.7546858748477634, + "objective/train/tokens_used": 767570400, + "objective/train/value_avg": -0.023040771484375, + "objective/train/value_loss": 0.004288068972527981, + "objective/train/value_max": -0.0017337799072265625, + "objective/train/value_min": -0.77490234375, + "objective/train/value_reward_corr": 0.34808044613921985, + "objective/train/value_std": 0.024871826171875, + "objective/train/weight_avg": 1.0007696151733398, + "objective/train/weighted_lm_loss": 3.2834975719451904, + "objective/train/weights_max": 1.0333362817764282, + "objective/train/weights_min": 0.9145675301551819, + "theoretical_loss": 3.7546858748477634, + "tokens_seen": 747110400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007813804173354735, + "loss": 1.8396, + "theoretical_loss": 3.7544215578215177, + "tokens_seen": 747634688 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007810593900481541, + "loss": 1.8227, + "theoretical_loss": 3.75389363483638, + "tokens_seen": 748683264 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.010370293632149696, + "objective/train/docs_used": 432599, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.792257070541382, + "objective/train/original_loss": 3.79225754737854, + "objective/train/theoretical_loss": 3.7538606710769904, + "objective/train/tokens_used": 769208800, + "objective/train/value_avg": -0.0207977294921875, + "objective/train/value_loss": 0.002479627262800932, + "objective/train/value_max": -0.0017480850219726562, + "objective/train/value_min": -0.51806640625, + "objective/train/value_reward_corr": 0.31863713668475224, + "objective/train/value_std": 0.0214385986328125, + "objective/train/weight_avg": 1.0010493993759155, + "objective/train/weighted_lm_loss": 3.7961795330047607, + "objective/train/weights_max": 1.0403306484222412, + "objective/train/weights_min": 0.9373089671134949, + "theoretical_loss": 3.7538606710769904, + "tokens_seen": 748748800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007807383627608346, + "loss": 1.847, + "theoretical_loss": 3.753366657420483, + "tokens_seen": 749731840 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0049913362599909306, + "objective/train/docs_used": 433353, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5167930126190186, + "objective/train/original_loss": 3.5167927742004395, + "objective/train/theoretical_loss": 3.7530377753682695, + "objective/train/tokens_used": 770847200, + "objective/train/value_avg": -0.027740478515625, + "objective/train/value_loss": 0.006480441428720951, + "objective/train/value_max": -0.00147247314453125, + "objective/train/value_min": -0.4814453125, + "objective/train/value_reward_corr": 0.7032000433700328, + "objective/train/value_std": 0.05810546875, + "objective/train/weight_avg": 1.0005310773849487, + "objective/train/weighted_lm_loss": 3.518812894821167, + "objective/train/weights_max": 1.0410645008087158, + "objective/train/weights_min": 0.9155840873718262, + "theoretical_loss": 3.7530377753682695, + "tokens_seen": 750387200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007804173354735152, + "loss": 1.8455, + "theoretical_loss": 3.75284062256128, + "tokens_seen": 750780416 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007800963081861958, + "loss": 1.8355, + "theoretical_loss": 3.7523155272600137, + "tokens_seen": 751828992 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.008297768421471119, + "objective/train/docs_used": 434593, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.244894027709961, + "objective/train/original_loss": 4.244894027709961, + "objective/train/theoretical_loss": 3.752217176247707, + "objective/train/tokens_used": 772485600, + "objective/train/value_avg": -0.024200439453125, + "objective/train/value_loss": 0.003134598955512047, + "objective/train/value_max": -0.0011835098266601562, + "objective/train/value_min": -0.499755859375, + "objective/train/value_reward_corr": 0.4751560733634076, + "objective/train/value_std": 0.0295867919921875, + "objective/train/weight_avg": 1.0008453130722046, + "objective/train/weighted_lm_loss": 4.248797416687012, + "objective/train/weights_max": 1.0295569896697998, + "objective/train/weights_min": 0.9380462169647217, + "theoretical_loss": 3.752217176247707, + "tokens_seen": 752025600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007797752808988764, + "loss": 1.8571, + "theoretical_loss": 3.751791368531631, + "tokens_seen": 752877568 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.01464638952165842, + "objective/train/docs_used": 435245, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.173969268798828, + "objective/train/original_loss": 4.173969268798828, + "objective/train/theoretical_loss": 3.7513988623232883, + "objective/train/tokens_used": 774124000, + "objective/train/value_avg": -0.026763916015625, + "objective/train/value_loss": 0.0034004847984761, + "objective/train/value_max": -0.0013723373413085938, + "objective/train/value_min": -0.3671875, + "objective/train/value_reward_corr": 0.4171760569520776, + "objective/train/value_std": 0.037628173828125, + "objective/train/weight_avg": 1.0014814138412476, + "objective/train/weighted_lm_loss": 4.180362224578857, + "objective/train/weights_max": 1.033830165863037, + "objective/train/weights_min": 0.912857174873352, + "theoretical_loss": 3.7513988623232883, + "tokens_seen": 753664000 + }, + { + "epoch": 0.23, + "learning_rate": 0.000779454253611557, + "loss": 1.8508, + "theoretical_loss": 3.7512681434047033, + "tokens_seen": 753926144 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007791332263242376, + "loss": 1.8474, + "theoretical_loss": 3.7507458489213477, + "tokens_seen": 754974720 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.004775169305503368, + "objective/train/docs_used": 436337, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.443742275238037, + "objective/train/original_loss": 3.443742513656616, + "objective/train/theoretical_loss": 3.7505828222841155, + "objective/train/tokens_used": 775762400, + "objective/train/value_avg": -0.0288848876953125, + "objective/train/value_loss": 0.003413571510463953, + "objective/train/value_max": -0.0016489028930664062, + "objective/train/value_min": -0.51123046875, + "objective/train/value_reward_corr": 0.7674002025132293, + "objective/train/value_std": 0.058258056640625, + "objective/train/weight_avg": 1.0004944801330566, + "objective/train/weighted_lm_loss": 3.4465136528015137, + "objective/train/weights_max": 1.0422552824020386, + "objective/train/weights_min": 0.9518956542015076, + "theoretical_loss": 3.7505828222841155, + "tokens_seen": 755302400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007788121990369182, + "loss": 1.821, + "theoretical_loss": 3.7502244821371407, + "tokens_seen": 756023296 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.00870074238628149, + "objective/train/docs_used": 436904, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.113287925720215, + "objective/train/original_loss": 4.113287925720215, + "objective/train/theoretical_loss": 3.7497690448996552, + "objective/train/tokens_used": 777400800, + "objective/train/value_avg": -0.030487060546875, + "objective/train/value_loss": 0.022679606452584267, + "objective/train/value_max": -0.0017614364624023438, + "objective/train/value_min": -0.76904296875, + "objective/train/value_reward_corr": 0.3220205612721618, + "objective/train/value_std": 0.03204345703125, + "objective/train/weight_avg": 0.9992406368255615, + "objective/train/weighted_lm_loss": 4.107402324676514, + "objective/train/weights_max": 1.0401051044464111, + "objective/train/weights_min": 0.9085322618484497, + "theoretical_loss": 3.7497690448996552, + "tokens_seen": 756940800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007784911717495987, + "loss": 1.8608, + "theoretical_loss": 3.7497040401210446, + "tokens_seen": 757071872 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007781701444622793, + "loss": 1.8026, + "theoretical_loss": 3.7491845199553238, + "tokens_seen": 758120448 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.004041209351271391, + "objective/train/docs_used": 438157, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.791341781616211, + "objective/train/original_loss": 3.791341781616211, + "objective/train/theoretical_loss": 3.7489575190189965, + "objective/train/tokens_used": 779039200, + "objective/train/value_avg": -0.01641845703125, + "objective/train/value_loss": 0.0038335241843014956, + "objective/train/value_max": -0.0011882781982421875, + "objective/train/value_min": -0.2763671875, + "objective/train/value_reward_corr": 0.3060221498889168, + "objective/train/value_std": 0.0174407958984375, + "objective/train/weight_avg": 1.0004229545593262, + "objective/train/weighted_lm_loss": 3.792457342147827, + "objective/train/weights_max": 1.024115800857544, + "objective/train/weights_min": 0.912951648235321, + "theoretical_loss": 3.7489575190189965, + "tokens_seen": 758579200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007778491171749599, + "loss": 1.8239, + "theoretical_loss": 3.748665918735468, + "tokens_seen": 759169024 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.013343006372451782, + "objective/train/docs_used": 438864, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1701533794403076, + "objective/train/original_loss": 3.1701529026031494, + "objective/train/theoretical_loss": 3.748148233570115, + "objective/train/tokens_used": 780677600, + "objective/train/value_avg": -0.03216552734375, + "objective/train/value_loss": 0.015594160184264183, + "objective/train/value_max": -0.0012445449829101562, + "objective/train/value_min": -0.630859375, + "objective/train/value_reward_corr": 0.6662650057343928, + "objective/train/value_std": 0.043426513671875, + "objective/train/weight_avg": 0.9987419247627258, + "objective/train/weighted_lm_loss": 3.1705586910247803, + "objective/train/weights_max": 1.0279573202133179, + "objective/train/weights_min": 0.9147488474845886, + "theoretical_loss": 3.748148233570115, + "tokens_seen": 760217600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007775280898876405, + "loss": 1.7957, + "theoretical_loss": 3.748148233570115, + "tokens_seen": 760217600 + }, + { + "epoch": 0.23, + "learning_rate": 0.000777207062600321, + "loss": 1.7833, + "theoretical_loss": 3.74763146158097, + "tokens_seen": 761266176 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.017398551106452942, + "objective/train/docs_used": 439620, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2530415058135986, + "objective/train/original_loss": 3.2530417442321777, + "objective/train/theoretical_loss": 3.7473411775591465, + "objective/train/tokens_used": 782316000, + "objective/train/value_avg": -0.02166748046875, + "objective/train/value_loss": 0.0010133241303265095, + "objective/train/value_max": -0.0018825531005859375, + "objective/train/value_min": -0.36669921875, + "objective/train/value_reward_corr": 0.24297760460625809, + "objective/train/value_std": 0.02093505859375, + "objective/train/weight_avg": 1.0017448663711548, + "objective/train/weighted_lm_loss": 3.2585549354553223, + "objective/train/weights_max": 1.0356709957122803, + "objective/train/weights_min": 0.9662796854972839, + "theoretical_loss": 3.7473411775591465, + "tokens_seen": 761856000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007768860353130016, + "loss": 1.7733, + "theoretical_loss": 3.747115599902733, + "tokens_seen": 762314752 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007765650080256822, + "loss": 1.8129, + "theoretical_loss": 3.746600645683017, + "tokens_seen": 763363328 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0026009329594671726, + "objective/train/docs_used": 440741, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.563310384750366, + "objective/train/original_loss": 3.563309907913208, + "objective/train/theoretical_loss": 3.7465363400696683, + "objective/train/tokens_used": 783954400, + "objective/train/value_avg": -0.027923583984375, + "objective/train/value_loss": 0.007115010172128677, + "objective/train/value_max": -0.0016813278198242188, + "objective/train/value_min": -0.61767578125, + "objective/train/value_reward_corr": 0.551145056117814, + "objective/train/value_std": 0.043701171875, + "objective/train/weight_avg": 1.0002950429916382, + "objective/train/weighted_lm_loss": 3.56512451171875, + "objective/train/weights_max": 1.040624737739563, + "objective/train/weights_min": 0.9125585556030273, + "theoretical_loss": 3.7465363400696683, + "tokens_seen": 763494400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007762439807383627, + "loss": 1.7931, + "theoretical_loss": 3.7460865960822782, + "tokens_seen": 764411904 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.008193270303308964, + "objective/train/docs_used": 441342, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3983542919158936, + "objective/train/original_loss": 3.3983545303344727, + "objective/train/theoretical_loss": 3.7457337102619888, + "objective/train/tokens_used": 785592800, + "objective/train/value_avg": -0.0258331298828125, + "objective/train/value_loss": 0.0019205660792067647, + "objective/train/value_max": -0.0013885498046875, + "objective/train/value_min": -0.288330078125, + "objective/train/value_reward_corr": 0.5444572323420125, + "objective/train/value_std": 0.0301513671875, + "objective/train/weight_avg": 1.0008288621902466, + "objective/train/weighted_lm_loss": 3.401627779006958, + "objective/train/weights_max": 1.025763750076294, + "objective/train/weights_min": 0.9258610606193542, + "theoretical_loss": 3.7457337102619888, + "tokens_seen": 765132800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007759229534510433, + "loss": 1.7789, + "theoretical_loss": 3.745573448273736, + "tokens_seen": 765460480 + }, + { + "epoch": 0.23, + "learning_rate": 0.000775601926163724, + "loss": 1.7813, + "theoretical_loss": 3.7450611994433, + "tokens_seen": 766509056 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.011955822817981243, + "objective/train/docs_used": 442736, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.943802833557129, + "objective/train/original_loss": 2.943802833557129, + "objective/train/theoretical_loss": 3.7449332773724455, + "objective/train/tokens_used": 787231200, + "objective/train/value_avg": -0.028411865234375, + "objective/train/value_loss": 0.002453274792060256, + "objective/train/value_max": -0.001949310302734375, + "objective/train/value_min": -0.368408203125, + "objective/train/value_reward_corr": 0.5129516979160783, + "objective/train/value_std": 0.035797119140625, + "objective/train/weight_avg": 1.0012078285217285, + "objective/train/weighted_lm_loss": 2.9476890563964844, + "objective/train/weights_max": 1.027849555015564, + "objective/train/weights_min": 0.9433425068855286, + "theoretical_loss": 3.7449332773724455, + "tokens_seen": 766771200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007752808988764046, + "loss": 1.7978, + "theoretical_loss": 3.7445498467894947, + "tokens_seen": 767557632 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.013522540219128132, + "objective/train/docs_used": 443303, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.361311912536621, + "objective/train/original_loss": 3.361311912536621, + "objective/train/theoretical_loss": 3.7441350307127115, + "objective/train/tokens_used": 788869600, + "objective/train/value_avg": -0.03997802734375, + "objective/train/value_loss": 0.006029500160366297, + "objective/train/value_max": -0.00131988525390625, + "objective/train/value_min": -0.61181640625, + "objective/train/value_reward_corr": 0.5473442590149351, + "objective/train/value_std": 0.07196044921875, + "objective/train/weight_avg": 1.0013819932937622, + "objective/train/weighted_lm_loss": 3.3647713661193848, + "objective/train/weights_max": 1.0418386459350586, + "objective/train/weights_min": 0.9194912314414978, + "theoretical_loss": 3.7441350307127115, + "tokens_seen": 768409600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007749598715890851, + "loss": 1.7648, + "theoretical_loss": 3.7440393875233893, + "tokens_seen": 768606208 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007746388443017657, + "loss": 1.7772, + "theoretical_loss": 3.7435298188685184, + "tokens_seen": 769654784 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.008551796898245811, + "objective/train/docs_used": 444724, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.687131881713867, + "objective/train/original_loss": 3.687131404876709, + "objective/train/theoretical_loss": 3.7433389596691073, + "objective/train/tokens_used": 790508000, + "objective/train/value_avg": -0.0273895263671875, + "objective/train/value_loss": 0.007169385440647602, + "objective/train/value_max": -0.0011339187622070312, + "objective/train/value_min": -0.72265625, + "objective/train/value_reward_corr": 0.4369654935945212, + "objective/train/value_std": 0.029571533203125, + "objective/train/weight_avg": 1.0008902549743652, + "objective/train/weighted_lm_loss": 3.689711332321167, + "objective/train/weights_max": 1.0419970750808716, + "objective/train/weights_min": 0.9093888998031616, + "theoretical_loss": 3.7433389596691073, + "tokens_seen": 770048000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007743178170144463, + "loss": 1.7478, + "theoretical_loss": 3.7430211380608167, + "tokens_seen": 770703360 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.002958221361041069, + "objective/train/docs_used": 445347, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.744248867034912, + "objective/train/original_loss": 3.744248628616333, + "objective/train/theoretical_loss": 3.7425450537019223, + "objective/train/tokens_used": 792146400, + "objective/train/value_avg": -0.0287628173828125, + "objective/train/value_loss": 0.01292910985648632, + "objective/train/value_max": -0.0011692047119140625, + "objective/train/value_min": -0.54931640625, + "objective/train/value_reward_corr": 0.46115640797784274, + "objective/train/value_std": 0.041473388671875, + "objective/train/weight_avg": 0.9997673630714417, + "objective/train/weighted_lm_loss": 3.743328809738159, + "objective/train/weights_max": 1.0304392576217651, + "objective/train/weights_min": 0.9065282344818115, + "theoretical_loss": 3.7425450537019223, + "tokens_seen": 771686400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007739967897271268, + "loss": 1.7958, + "theoretical_loss": 3.74251334234854, + "tokens_seen": 771751936 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007736757624398074, + "loss": 1.7706, + "theoretical_loss": 3.742006428992198, + "tokens_seen": 772800512 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.00826406478881836, + "objective/train/docs_used": 446623, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3706417083740234, + "objective/train/original_loss": 3.3706412315368652, + "objective/train/theoretical_loss": 3.7417533023447445, + "objective/train/tokens_used": 793784800, + "objective/train/value_avg": -0.02069091796875, + "objective/train/value_loss": 0.0027734050527215004, + "objective/train/value_max": -0.0016813278198242188, + "objective/train/value_min": -0.4306640625, + "objective/train/value_reward_corr": 0.3227578347214723, + "objective/train/value_std": 0.02685546875, + "objective/train/weight_avg": 1.0008400678634644, + "objective/train/weighted_lm_loss": 3.3742048740386963, + "objective/train/weights_max": 1.042900800704956, + "objective/train/weights_min": 0.9189751148223877, + "theoretical_loss": 3.7417533023447445, + "tokens_seen": 773324800 + }, + { + "epoch": 0.23, + "learning_rate": 0.000773354735152488, + "loss": 1.7815, + "theoretical_loss": 3.741500395264481, + "tokens_seen": 773849088 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007730337078651686, + "loss": 1.7876, + "theoretical_loss": 3.7409952384501923, + "tokens_seen": 774897664 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.011669809930026531, + "objective/train/docs_used": 447277, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5293617248535156, + "objective/train/original_loss": 3.5293617248535156, + "objective/train/theoretical_loss": 3.7409636952037957, + "objective/train/tokens_used": 795423200, + "objective/train/value_avg": -0.033966064453125, + "objective/train/value_loss": 0.003229996655136347, + "objective/train/value_max": -0.0017414093017578125, + "objective/train/value_min": -0.458251953125, + "objective/train/value_reward_corr": 0.6673955648892279, + "objective/train/value_std": 0.045379638671875, + "objective/train/weight_avg": 1.0011831521987915, + "objective/train/weighted_lm_loss": 3.534687042236328, + "objective/train/weights_max": 1.0467920303344727, + "objective/train/weights_min": 0.9471722841262817, + "theoretical_loss": 3.7409636952037957, + "tokens_seen": 774963200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007727126805778491, + "loss": 1.7809, + "theoretical_loss": 3.740490955846173, + "tokens_seen": 775946240 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.005143603775650263, + "objective/train/docs_used": 447858, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.930225372314453, + "objective/train/original_loss": 3.930225372314453, + "objective/train/theoretical_loss": 3.7401762219572765, + "objective/train/tokens_used": 797061600, + "objective/train/value_avg": -0.0223236083984375, + "objective/train/value_loss": 0.006650871597230434, + "objective/train/value_max": -0.0015306472778320312, + "objective/train/value_min": -0.6728515625, + "objective/train/value_reward_corr": 0.5026031780688057, + "objective/train/value_std": 0.032867431640625, + "objective/train/weight_avg": 1.000546932220459, + "objective/train/weighted_lm_loss": 3.931741237640381, + "objective/train/weights_max": 1.0332099199295044, + "objective/train/weights_min": 0.9108349680900574, + "theoretical_loss": 3.7401762219572765, + "tokens_seen": 776601600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007723916532905297, + "loss": 1.7323, + "theoretical_loss": 3.739987544761238, + "tokens_seen": 776994816 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007720706260032103, + "loss": 1.762, + "theoretical_loss": 3.7394850025161026, + "tokens_seen": 778043392 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.00035903751268051565, + "objective/train/docs_used": 448758, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8526761531829834, + "objective/train/original_loss": 3.8526761531829834, + "objective/train/theoretical_loss": 3.739390872354716, + "objective/train/tokens_used": 798700000, + "objective/train/value_avg": -0.027130126953125, + "objective/train/value_loss": 0.006957345642149448, + "objective/train/value_max": -0.0011835098266601562, + "objective/train/value_min": -0.346435546875, + "objective/train/value_reward_corr": 0.5036072563740318, + "objective/train/value_std": 0.040374755859375, + "objective/train/weight_avg": 1.0000702142715454, + "objective/train/weighted_lm_loss": 3.8516647815704346, + "objective/train/weights_max": 1.030701756477356, + "objective/train/weights_min": 0.9153907895088196, + "theoretical_loss": 3.739390872354716, + "tokens_seen": 778240000 + }, + { + "epoch": 0.24, + "learning_rate": 0.000771749598715891, + "loss": 1.7502, + "theoretical_loss": 3.738983326443316, + "tokens_seen": 779091968 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.008509553968906403, + "objective/train/docs_used": 449318, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.023716926574707, + "objective/train/original_loss": 4.023717403411865, + "objective/train/theoretical_loss": 3.73860763621633, + "objective/train/tokens_used": 800338400, + "objective/train/value_avg": -0.02099609375, + "objective/train/value_loss": 0.005560053512454033, + "objective/train/value_max": -0.001094818115234375, + "objective/train/value_min": -0.4736328125, + "objective/train/value_reward_corr": 0.2411735369386486, + "objective/train/value_std": 0.0207366943359375, + "objective/train/weight_avg": 1.0008782148361206, + "objective/train/weighted_lm_loss": 4.027108192443848, + "objective/train/weights_max": 1.045078158378601, + "objective/train/weights_min": 0.9092631936073303, + "theoretical_loss": 3.73860763621633, + "tokens_seen": 779878400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007714285714285715, + "loss": 1.8459, + "theoretical_loss": 3.7384825138871944, + "tokens_seen": 780140544 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007711075441412521, + "loss": 1.7967, + "theoretical_loss": 3.737982562203749, + "tokens_seen": 781189120 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.007374615408480167, + "objective/train/docs_used": 450726, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.857198715209961, + "objective/train/original_loss": 2.85719895362854, + "objective/train/theoretical_loss": 3.7378265034323865, + "objective/train/tokens_used": 801976800, + "objective/train/value_avg": -0.0165557861328125, + "objective/train/value_loss": 0.0015551152173429728, + "objective/train/value_max": -0.001483917236328125, + "objective/train/value_min": -0.310546875, + "objective/train/value_reward_corr": 0.1658725871242432, + "objective/train/value_std": 0.01383209228515625, + "objective/train/weight_avg": 1.000745177268982, + "objective/train/weighted_lm_loss": 2.859328269958496, + "objective/train/weights_max": 1.03147292137146, + "objective/train/weights_min": 0.9176470637321472, + "theoretical_loss": 3.7378265034323865, + "tokens_seen": 781516800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007707865168539327, + "loss": 1.7929, + "theoretical_loss": 3.737483468760624, + "tokens_seen": 782237696 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": -0.02826501987874508, + "objective/train/docs_used": 451239, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.819281816482544, + "objective/train/original_loss": 3.8192813396453857, + "objective/train/theoretical_loss": 3.737047463962579, + "objective/train/tokens_used": 803615200, + "objective/train/value_avg": -0.06414794921875, + "objective/train/value_loss": 0.009701349772512913, + "objective/train/value_max": -0.001674652099609375, + "objective/train/value_min": -0.54296875, + "objective/train/value_reward_corr": 0.9424002626226801, + "objective/train/value_std": 0.11798095703125, + "objective/train/weight_avg": 0.9972215890884399, + "objective/train/weighted_lm_loss": 3.8052291870117188, + "objective/train/weights_max": 1.024168610572815, + "objective/train/weights_min": 0.9315828084945679, + "theoretical_loss": 3.737047463962579, + "tokens_seen": 783155200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007704654895666132, + "loss": 1.7704, + "theoretical_loss": 3.7369852309370275, + "tokens_seen": 783286272 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007701444622792938, + "loss": 1.7836, + "theoretical_loss": 3.736487846123663, + "tokens_seen": 784334848 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.007198237348347902, + "objective/train/docs_used": 452482, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3947293758392334, + "objective/train/original_loss": 3.3947298526763916, + "objective/train/theoretical_loss": 3.736270507835404, + "objective/train/tokens_used": 805253600, + "objective/train/value_avg": -0.015655517578125, + "objective/train/value_loss": 0.00201622792519629, + "objective/train/value_max": -0.0016937255859375, + "objective/train/value_min": -0.346435546875, + "objective/train/value_reward_corr": 0.17544466959271404, + "objective/train/value_std": 0.01308441162109375, + "objective/train/weight_avg": 1.0007297992706299, + "objective/train/weighted_lm_loss": 3.397176504135132, + "objective/train/weights_max": 1.0317641496658325, + "objective/train/weights_min": 0.9114437103271484, + "theoretical_loss": 3.736270507835404, + "tokens_seen": 784793600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007698234349919744, + "loss": 1.7648, + "theoretical_loss": 3.7359913117226684, + "tokens_seen": 785383424 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.00348873483017087, + "objective/train/docs_used": 453141, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.539799213409424, + "objective/train/original_loss": 3.539799690246582, + "objective/train/theoretical_loss": 3.735495625147548, + "objective/train/tokens_used": 806892000, + "objective/train/value_avg": -0.0185394287109375, + "objective/train/value_loss": 0.0011187737109139562, + "objective/train/value_max": -0.0011739730834960938, + "objective/train/value_min": -0.1610107421875, + "objective/train/value_reward_corr": 0.4528478397105908, + "objective/train/value_std": 0.0164337158203125, + "objective/train/weight_avg": 1.000354528427124, + "objective/train/weighted_lm_loss": 3.542569160461426, + "objective/train/weights_max": 1.0159767866134644, + "objective/train/weights_min": 0.9834701418876648, + "theoretical_loss": 3.735495625147548, + "tokens_seen": 786432000 + }, + { + "epoch": 0.24, + "learning_rate": 0.000769502407704655, + "loss": 1.767, + "theoretical_loss": 3.735495625147548, + "tokens_seen": 786432000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007691813804173355, + "loss": 1.7508, + "theoretical_loss": 3.735000783823107, + "tokens_seen": 787480576 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.010511333122849464, + "objective/train/docs_used": 453953, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.663240671157837, + "objective/train/original_loss": 3.663240909576416, + "objective/train/theoretical_loss": 3.7347228060632798, + "objective/train/tokens_used": 808530400, + "objective/train/value_avg": -0.0227203369140625, + "objective/train/value_loss": 0.0024958893191069365, + "objective/train/value_max": -0.0013151168823242188, + "objective/train/value_min": -0.371826171875, + "objective/train/value_reward_corr": 0.3641355819921661, + "objective/train/value_std": 0.035400390625, + "objective/train/weight_avg": 1.001063585281372, + "objective/train/weighted_lm_loss": 3.6675572395324707, + "objective/train/weights_max": 1.0335408449172974, + "objective/train/weights_min": 0.9454513788223267, + "theoretical_loss": 3.7347228060632798, + "tokens_seen": 788070400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007688603531300161, + "loss": 1.7472, + "theoretical_loss": 3.7345067851853897, + "tokens_seen": 788529152 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007685393258426966, + "loss": 1.7533, + "theoretical_loss": 3.7340136266816133, + "tokens_seen": 789577728 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.008950280956923962, + "objective/train/docs_used": 455438, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3924152851104736, + "objective/train/original_loss": 3.3924152851104736, + "objective/train/theoretical_loss": 3.7339520408138513, + "objective/train/tokens_used": 810168800, + "objective/train/value_avg": -0.020782470703125, + "objective/train/value_loss": 0.0007739263819530606, + "objective/train/value_max": -0.0012159347534179688, + "objective/train/value_min": -0.37060546875, + "objective/train/value_reward_corr": 0.4711772481001281, + "objective/train/value_std": 0.01971435546875, + "objective/train/weight_avg": 1.000898838043213, + "objective/train/weighted_lm_loss": 3.395549774169922, + "objective/train/weights_max": 1.0376759767532349, + "objective/train/weights_min": 0.9691603779792786, + "theoretical_loss": 3.7339520408138513, + "tokens_seen": 789708800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007682182985553771, + "loss": 1.7433, + "theoretical_loss": 3.733521305770105, + "tokens_seen": 790626304 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.01506956946104765, + "objective/train/docs_used": 456184, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.44697904586792, + "objective/train/original_loss": 3.446979284286499, + "objective/train/theoretical_loss": 3.733183319696901, + "objective/train/tokens_used": 811807200, + "objective/train/value_avg": -0.0204620361328125, + "objective/train/value_loss": 0.0011870579328387976, + "objective/train/value_max": -0.0013303756713867188, + "objective/train/value_min": -0.322021484375, + "objective/train/value_reward_corr": 0.1045823442290892, + "objective/train/value_std": 0.0179443359375, + "objective/train/weight_avg": 1.0015127658843994, + "objective/train/weighted_lm_loss": 3.4519476890563965, + "objective/train/weights_max": 1.0326449871063232, + "objective/train/weights_min": 0.9422342777252197, + "theoretical_loss": 3.733183319696901, + "tokens_seen": 791347200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007678972712680577, + "loss": 1.7522, + "theoretical_loss": 3.7330298199202394, + "tokens_seen": 791674880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007675762439807383, + "loss": 1.7502, + "theoretical_loss": 3.7325391666123764, + "tokens_seen": 792723456 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.010029459372162819, + "objective/train/docs_used": 457386, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5913658142089844, + "objective/train/original_loss": 3.5913658142089844, + "objective/train/theoretical_loss": 3.732416633075869, + "objective/train/tokens_used": 813445600, + "objective/train/value_avg": -0.0243072509765625, + "objective/train/value_loss": 0.0015828423202037811, + "objective/train/value_max": -0.0009813308715820312, + "objective/train/value_min": -0.289306640625, + "objective/train/value_reward_corr": 0.25036657829642284, + "objective/train/value_std": 0.0223846435546875, + "objective/train/weight_avg": 1.001010775566101, + "objective/train/weighted_lm_loss": 3.5954010486602783, + "objective/train/weights_max": 1.0292879343032837, + "objective/train/weights_min": 0.9570199847221375, + "theoretical_loss": 3.732416633075869, + "tokens_seen": 792985600 + }, + { + "epoch": 0.24, + "learning_rate": 0.000767255216693419, + "loss": 1.7448, + "theoretical_loss": 3.7320493433377973, + "tokens_seen": 793772032 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.005119736306369305, + "objective/train/docs_used": 458050, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5786728858947754, + "objective/train/original_loss": 3.5786728858947754, + "objective/train/theoretical_loss": 3.7316519713794145, + "objective/train/tokens_used": 815084000, + "objective/train/value_avg": -0.0157470703125, + "objective/train/value_loss": 0.003447649534791708, + "objective/train/value_max": -0.001483917236328125, + "objective/train/value_min": -0.304931640625, + "objective/train/value_reward_corr": 0.1616664932568234, + "objective/train/value_std": 0.01448822021484375, + "objective/train/weight_avg": 1.0005289316177368, + "objective/train/weighted_lm_loss": 3.5804762840270996, + "objective/train/weights_max": 1.0180938243865967, + "objective/train/weights_min": 0.9067116975784302, + "theoretical_loss": 3.7316519713794145, + "tokens_seen": 794624000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007669341894060995, + "loss": 1.7191, + "theoretical_loss": 3.731560347598646, + "tokens_seen": 794820608 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007666131621187801, + "loss": 1.713, + "theoretical_loss": 3.7310721769078636, + "tokens_seen": 795869184 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.003911398816853762, + "objective/train/docs_used": 459137, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3340442180633545, + "objective/train/original_loss": 3.3340442180633545, + "objective/train/theoretical_loss": 3.7308893251008413, + "objective/train/tokens_used": 816722400, + "objective/train/value_avg": -0.0149078369140625, + "objective/train/value_loss": 0.002465128432959318, + "objective/train/value_max": -0.00106048583984375, + "objective/train/value_min": -0.253662109375, + "objective/train/value_reward_corr": 0.26539904343018705, + "objective/train/value_std": 0.0142364501953125, + "objective/train/weight_avg": 1.0004032850265503, + "objective/train/weighted_lm_loss": 3.3360676765441895, + "objective/train/weights_max": 1.0254511833190918, + "objective/train/weights_min": 0.9351102113723755, + "theoretical_loss": 3.7308893251008413, + "tokens_seen": 796262400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007662921348314607, + "loss": 1.7348, + "theoretical_loss": 3.730584828789132, + "tokens_seen": 796917760 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0073074460960924625, + "objective/train/docs_used": 459607, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.959669351577759, + "objective/train/original_loss": 3.9596688747406006, + "objective/train/theoretical_loss": 3.7301286847975295, + "objective/train/tokens_used": 818360800, + "objective/train/value_avg": -0.043853759765625, + "objective/train/value_loss": 0.008053965866565704, + "objective/train/value_max": -0.0010986328125, + "objective/train/value_min": -0.69091796875, + "objective/train/value_reward_corr": 0.7173357520485716, + "objective/train/value_std": 0.09515380859375, + "objective/train/weight_avg": 1.0007702112197876, + "objective/train/weighted_lm_loss": 3.9638876914978027, + "objective/train/weights_max": 1.0405480861663818, + "objective/train/weights_min": 0.9084053039550781, + "theoretical_loss": 3.7301286847975295, + "tokens_seen": 797900800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007659711075441412, + "loss": 1.7294, + "theoretical_loss": 3.7300983007768105, + "tokens_seen": 797966336 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007656500802568218, + "loss": 1.733, + "theoretical_loss": 3.729612590415876, + "tokens_seen": 799014912 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0036733245942741632, + "objective/train/docs_used": 460885, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9792377948760986, + "objective/train/original_loss": 2.9792380332946777, + "objective/train/theoretical_loss": 3.729370041090373, + "objective/train/tokens_used": 819999200, + "objective/train/value_avg": -0.01617431640625, + "objective/train/value_loss": 0.003533311653882265, + "objective/train/value_max": -0.0010318756103515625, + "objective/train/value_min": -0.42724609375, + "objective/train/value_reward_corr": 0.2400624996206632, + "objective/train/value_std": 0.01540374755859375, + "objective/train/weight_avg": 1.0003846883773804, + "objective/train/weighted_lm_loss": 2.979916572570801, + "objective/train/weights_max": 1.0162385702133179, + "objective/train/weights_min": 0.9105715751647949, + "theoretical_loss": 3.729370041090373, + "tokens_seen": 799539200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007653290529695024, + "loss": 1.7573, + "theoretical_loss": 3.7291276952618655, + "tokens_seen": 800063488 + }, + { + "epoch": 0.24, + "learning_rate": 0.000765008025682183, + "loss": 1.7512, + "theoretical_loss": 3.7286436128808145, + "tokens_seen": 801112064 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.003485969267785549, + "objective/train/docs_used": 461399, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4483606815338135, + "objective/train/original_loss": 3.4483604431152344, + "objective/train/theoretical_loss": 3.7286133846632232, + "objective/train/tokens_used": 821637600, + "objective/train/value_avg": -0.022613525390625, + "objective/train/value_loss": 0.003918813541531563, + "objective/train/value_max": -0.00154876708984375, + "objective/train/value_min": -0.376220703125, + "objective/train/value_reward_corr": 0.23963301963746128, + "objective/train/value_std": 0.02288818359375, + "objective/train/weight_avg": 1.0003679990768433, + "objective/train/weighted_lm_loss": 3.450424909591675, + "objective/train/weights_max": 1.0360957384109497, + "objective/train/weights_min": 0.9152738451957703, + "theoretical_loss": 3.7286133846632232, + "tokens_seen": 801177600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007646869983948635, + "loss": 1.7543, + "theoretical_loss": 3.7281603408491995, + "tokens_seen": 802160640 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.011158397421240807, + "objective/train/docs_used": 462084, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3129756450653076, + "objective/train/original_loss": 3.3129756450653076, + "objective/train/theoretical_loss": 3.727858706262338, + "objective/train/tokens_used": 823276000, + "objective/train/value_avg": -0.01617431640625, + "objective/train/value_loss": 0.0007141103269532323, + "objective/train/value_max": -0.0012445449829101562, + "objective/train/value_min": -0.437255859375, + "objective/train/value_reward_corr": 0.1135764496396553, + "objective/train/value_std": 0.01505279541015625, + "objective/train/weight_avg": 1.0011193752288818, + "objective/train/weighted_lm_loss": 3.3165283203125, + "objective/train/weights_max": 1.0358644723892212, + "objective/train/weights_min": 0.9544285535812378, + "theoretical_loss": 3.727858706262338, + "tokens_seen": 802816000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007643659711075441, + "loss": 1.7031, + "theoretical_loss": 3.72767787675388, + "tokens_seen": 803209216 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007640449438202247, + "loss": 1.7516, + "theoretical_loss": 3.727196218192039, + "tokens_seen": 804257792 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.008480999618768692, + "objective/train/docs_used": 463313, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.726829767227173, + "objective/train/original_loss": 3.726830005645752, + "objective/train/theoretical_loss": 3.727105996695837, + "objective/train/tokens_used": 824914400, + "objective/train/value_avg": -0.0130767822265625, + "objective/train/value_loss": 0.0005972831859253347, + "objective/train/value_max": -0.001056671142578125, + "objective/train/value_min": -0.1927490234375, + "objective/train/value_reward_corr": 0.13325357889039477, + "objective/train/value_std": 0.01068878173828125, + "objective/train/weight_avg": 1.000851035118103, + "objective/train/weighted_lm_loss": 3.729835271835327, + "objective/train/weights_max": 1.0193812847137451, + "objective/train/weights_min": 0.9615616202354431, + "theoretical_loss": 3.727105996695837, + "tokens_seen": 804454400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007637239165329052, + "loss": 1.727, + "theoretical_loss": 3.7267153627711256, + "tokens_seen": 805306368 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.014093457721173763, + "objective/train/docs_used": 464591, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.8728785514831543, + "objective/train/original_loss": 3.8728787899017334, + "objective/train/theoretical_loss": 3.7263552468331667, + "objective/train/tokens_used": 826552800, + "objective/train/value_avg": -0.037689208984375, + "objective/train/value_loss": 0.008982114493846893, + "objective/train/value_max": -0.0016107559204101562, + "objective/train/value_min": -0.87451171875, + "objective/train/value_reward_corr": 0.49614981636694344, + "objective/train/value_std": 0.0673828125, + "objective/train/weight_avg": 1.0014537572860718, + "objective/train/weighted_lm_loss": 3.878570079803467, + "objective/train/weights_max": 1.0613123178482056, + "objective/train/weights_min": 0.9071692824363708, + "theoretical_loss": 3.7263552468331667, + "tokens_seen": 806092800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007634028892455859, + "loss": 1.7579, + "theoretical_loss": 3.7262353081088015, + "tokens_seen": 806354944 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007630818619582665, + "loss": 1.7815, + "theoretical_loss": 3.725756051832878, + "tokens_seen": 807403520 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.006380915176123381, + "objective/train/docs_used": 465345, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6795835494995117, + "objective/train/original_loss": 3.6795835494995117, + "objective/train/theoretical_loss": 3.725606447604563, + "objective/train/tokens_used": 828191200, + "objective/train/value_avg": -0.016387939453125, + "objective/train/value_loss": 0.003108340548351407, + "objective/train/value_max": -0.0010728836059570312, + "objective/train/value_min": -0.305908203125, + "objective/train/value_reward_corr": 0.23854324855640102, + "objective/train/value_std": 0.0168609619140625, + "objective/train/weight_avg": 1.0006533861160278, + "objective/train/weighted_lm_loss": 3.681769847869873, + "objective/train/weights_max": 1.0309914350509644, + "objective/train/weights_min": 0.9225464463233948, + "theoretical_loss": 3.725606447604563, + "tokens_seen": 807731200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007627608346709471, + "loss": 1.7428, + "theoretical_loss": 3.7252775915812655, + "tokens_seen": 808452096 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.005537970457226038, + "objective/train/docs_used": 466409, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.667154550552368, + "objective/train/original_loss": 3.667154312133789, + "objective/train/theoretical_loss": 3.724859590000527, + "objective/train/tokens_used": 829829600, + "objective/train/value_avg": -0.030548095703125, + "objective/train/value_loss": 0.010195709764957428, + "objective/train/value_max": -0.001483917236328125, + "objective/train/value_min": -0.67578125, + "objective/train/value_reward_corr": 0.4197682226879943, + "objective/train/value_std": 0.034637451171875, + "objective/train/weight_avg": 1.0006037950515747, + "objective/train/weighted_lm_loss": 3.6692309379577637, + "objective/train/weights_max": 1.0350450277328491, + "objective/train/weights_min": 0.9090749621391296, + "theoretical_loss": 3.724859590000527, + "tokens_seen": 809369600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007624398073836276, + "loss": 1.7573, + "theoretical_loss": 3.724799925001913, + "tokens_seen": 809500672 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007621187800963082, + "loss": 1.738, + "theoretical_loss": 3.7243230497527553, + "tokens_seen": 810549248 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": -0.002860798267647624, + "objective/train/docs_used": 466961, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9901251792907715, + "objective/train/original_loss": 3.9901254177093506, + "objective/train/theoretical_loss": 3.7241146650713013, + "objective/train/tokens_used": 831468000, + "objective/train/value_avg": -0.018951416015625, + "objective/train/value_loss": 0.011143472045660019, + "objective/train/value_max": -0.001239776611328125, + "objective/train/value_min": -0.7353515625, + "objective/train/value_reward_corr": 0.25862551232235964, + "objective/train/value_std": 0.021209716796875, + "objective/train/weight_avg": 0.9997683763504028, + "objective/train/weighted_lm_loss": 3.9884471893310547, + "objective/train/weights_max": 1.0180199146270752, + "objective/train/weights_min": 0.9101419448852539, + "theoretical_loss": 3.7241146650713013, + "tokens_seen": 811008000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007617977528089888, + "loss": 1.7479, + "theoretical_loss": 3.723846963501657, + "tokens_seen": 811597824 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": -0.0012386495945975184, + "objective/train/docs_used": 468484, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3712868690490723, + "objective/train/original_loss": 3.3712871074676514, + "objective/train/theoretical_loss": 3.7233716639263568, + "objective/train/tokens_used": 833106400, + "objective/train/value_avg": -0.021881103515625, + "objective/train/value_loss": 0.005506249610334635, + "objective/train/value_max": -0.0014162063598632812, + "objective/train/value_min": -0.3857421875, + "objective/train/value_reward_corr": 0.29656327432617124, + "objective/train/value_std": 0.022674560546875, + "objective/train/weight_avg": 0.9999032616615295, + "objective/train/weighted_lm_loss": 3.3721725940704346, + "objective/train/weights_max": 1.0248637199401855, + "objective/train/weights_min": 0.9109535813331604, + "theoretical_loss": 3.7233716639263568, + "tokens_seen": 812646400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007614767255216694, + "loss": 1.7483, + "theoretical_loss": 3.7233716639263568, + "tokens_seen": 812646400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007611556982343499, + "loss": 1.7391, + "theoretical_loss": 3.7228971487144147, + "tokens_seen": 813694976 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.014556951820850372, + "objective/train/docs_used": 469177, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0137522220611572, + "objective/train/original_loss": 3.0137524604797363, + "objective/train/theoretical_loss": 3.722630577733881, + "objective/train/tokens_used": 834744800, + "objective/train/value_avg": -0.0203704833984375, + "objective/train/value_loss": 0.0008035158389247954, + "objective/train/value_max": -0.0009775161743164062, + "objective/train/value_min": -0.322509765625, + "objective/train/value_reward_corr": 0.44711881708733325, + "objective/train/value_std": 0.027008056640625, + "objective/train/weight_avg": 1.0014597177505493, + "objective/train/weighted_lm_loss": 3.018789052963257, + "objective/train/weights_max": 1.0271450281143188, + "objective/train/weights_min": 0.9836347103118896, + "theoretical_loss": 3.722630577733881, + "tokens_seen": 814284800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007608346709470305, + "loss": 1.7507, + "theoretical_loss": 3.722423415563156, + "tokens_seen": 814743552 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007605136436597111, + "loss": 1.7505, + "theoretical_loss": 3.7219504621796187, + "tokens_seen": 815792128 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.006051173433661461, + "objective/train/docs_used": 469961, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.90824031829834, + "objective/train/original_loss": 3.90824031829834, + "objective/train/theoretical_loss": 3.7218913977202703, + "objective/train/tokens_used": 836383200, + "objective/train/value_avg": -0.02618408203125, + "objective/train/value_loss": 0.0040171160362660885, + "objective/train/value_max": -0.0012693405151367188, + "objective/train/value_min": -0.380859375, + "objective/train/value_reward_corr": 0.6837160873636965, + "objective/train/value_std": 0.045745849609375, + "objective/train/weight_avg": 1.0006248950958252, + "objective/train/weighted_lm_loss": 3.910517692565918, + "objective/train/weights_max": 1.0265636444091797, + "objective/train/weights_min": 0.9213883280754089, + "theoretical_loss": 3.7218913977202703, + "tokens_seen": 815923200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007601926163723916, + "loss": 1.7454, + "theoretical_loss": 3.7214782862805, + "tokens_seen": 816840704 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": -0.014850840903818607, + "objective/train/docs_used": 471203, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.182927370071411, + "objective/train/original_loss": 3.182927131652832, + "objective/train/theoretical_loss": 3.721154115169637, + "objective/train/tokens_used": 838021600, + "objective/train/value_avg": -0.02178955078125, + "objective/train/value_loss": 0.0057106949388980865, + "objective/train/value_max": -0.00147247314453125, + "objective/train/value_min": -0.53662109375, + "objective/train/value_reward_corr": 0.31996985850033566, + "objective/train/value_std": 0.0197601318359375, + "objective/train/weight_avg": 0.9985431432723999, + "objective/train/weighted_lm_loss": 3.1815638542175293, + "objective/train/weights_max": 1.0249050855636597, + "objective/train/weights_min": 0.910016655921936, + "theoretical_loss": 3.721154115169637, + "tokens_seen": 817561600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007598715890850722, + "loss": 1.7573, + "theoretical_loss": 3.7210068855921024, + "tokens_seen": 817889280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007595505617977529, + "loss": 1.7362, + "theoretical_loss": 3.7205362578502834, + "tokens_seen": 818937856 + }, + { + "debugging/Self-BLEU-5": 0.49160234205869136, + "debugging/distinct-1-grams": 0.8070841648877926, + "debugging/distinct-2-grams": 0.9681388503540196, + "debugging/entropy-1-grams": 6.02777455899678, + "debugging/entropy-2-grams": 6.929977811593813, + "debugging/length": 501.2142857142857, + "debugging/num_segments": 14, + "debugging/raw_token_scores_avg": 0.00842924602329731, + "debugging/raw_token_scores_std": 0.02813110128045082, + "epoch": 0.25, + "objective/train/advantage_avg": 0.006886274088174105, + "objective/train/docs_used": 471797, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4740428924560547, + "objective/train/original_loss": 3.4740426540374756, + "objective/train/theoretical_loss": 3.7204187214233073, + "objective/train/tokens_used": 839660000, + "objective/train/value_avg": -0.01531982421875, + "objective/train/value_loss": 0.0008076109224930406, + "objective/train/value_max": -0.0009889602661132812, + "objective/train/value_min": -0.3408203125, + "objective/train/value_reward_corr": 0.30125439048906855, + "objective/train/value_std": 0.01480865478515625, + "objective/train/weight_avg": 1.0006927251815796, + "objective/train/weighted_lm_loss": 3.4759037494659424, + "objective/train/weights_max": 1.0256562232971191, + "objective/train/weights_min": 0.9662600755691528, + "theoretical_loss": 3.7204187214233073, + "tokens_seen": 819200000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007592295345104335, + "loss": 1.7349, + "theoretical_loss": 3.7200664008004, + "tokens_seen": 819986432 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": -0.01584538444876671, + "objective/train/docs_used": 473281, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.350033760070801, + "objective/train/original_loss": 3.350033760070801, + "objective/train/theoretical_loss": 3.7196852078793388, + "objective/train/tokens_used": 841298400, + "objective/train/value_avg": -0.0196533203125, + "objective/train/value_loss": 0.021186206489801407, + "objective/train/value_max": -0.001255035400390625, + "objective/train/value_min": -0.90185546875, + "objective/train/value_reward_corr": 0.5696302119663464, + "objective/train/value_std": 0.0396728515625, + "objective/train/weight_avg": 0.9985188245773315, + "objective/train/weighted_lm_loss": 3.3403565883636475, + "objective/train/weights_max": 1.0542761087417603, + "objective/train/weights_min": 0.9075272083282471, + "theoretical_loss": 3.7196852078793388, + "tokens_seen": 820838400 + }, + { + "epoch": 0.25, + "learning_rate": 0.000758908507223114, + "loss": 1.6912, + "theoretical_loss": 3.7195973121972585, + "tokens_seen": 821035008 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007585874799357946, + "loss": 1.7109, + "theoretical_loss": 3.7191289898050632, + "tokens_seen": 822083584 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0006311347824521363, + "objective/train/docs_used": 473825, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3650314807891846, + "objective/train/original_loss": 3.3650314807891846, + "objective/train/theoretical_loss": 3.718953565992031, + "objective/train/tokens_used": 842936800, + "objective/train/value_avg": -0.0305023193359375, + "objective/train/value_loss": 0.006791841238737106, + "objective/train/value_max": -0.00106048583984375, + "objective/train/value_min": -0.68896484375, + "objective/train/value_reward_corr": 0.5491891757468822, + "objective/train/value_std": 0.048614501953125, + "objective/train/weight_avg": 1.0000966787338257, + "objective/train/weighted_lm_loss": 3.364429235458374, + "objective/train/weights_max": 1.041347861289978, + "objective/train/weights_min": 0.9261608123779297, + "theoretical_loss": 3.718953565992031, + "tokens_seen": 822476800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007582664526484752, + "loss": 1.7274, + "theoretical_loss": 3.7186614313973645, + "tokens_seen": 823132160 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.006977071054279804, + "objective/train/docs_used": 475209, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.893580198287964, + "objective/train/original_loss": 3.8935799598693848, + "objective/train/theoretical_loss": 3.718223787271449, + "objective/train/tokens_used": 844575200, + "objective/train/value_avg": -0.04571533203125, + "objective/train/value_loss": 0.018220975995063782, + "objective/train/value_max": -0.0015668869018554688, + "objective/train/value_min": -0.85205078125, + "objective/train/value_reward_corr": 0.393317992148713, + "objective/train/value_std": 0.0711669921875, + "objective/train/weight_avg": 1.0007872581481934, + "objective/train/weighted_lm_loss": 3.8953707218170166, + "objective/train/weights_max": 1.0535527467727661, + "objective/train/weights_min": 0.9086506962776184, + "theoretical_loss": 3.718223787271449, + "tokens_seen": 824115200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007579454253611557, + "loss": 1.7528, + "theoretical_loss": 3.7181946347570074, + "tokens_seen": 824180736 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007576243980738363, + "loss": 1.7057, + "theoretical_loss": 3.7177285976760834, + "tokens_seen": 825229312 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0031148905400186777, + "objective/train/docs_used": 475848, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0648369789123535, + "objective/train/original_loss": 3.0648374557495117, + "objective/train/theoretical_loss": 3.7174958632829522, + "objective/train/tokens_used": 846213600, + "objective/train/value_avg": -0.0201263427734375, + "objective/train/value_loss": 0.0066948216408491135, + "objective/train/value_max": -0.0008797645568847656, + "objective/train/value_min": -0.67724609375, + "objective/train/value_reward_corr": 0.3671655013963923, + "objective/train/value_std": 0.0269622802734375, + "objective/train/weight_avg": 1.0003442764282227, + "objective/train/weighted_lm_loss": 3.0653254985809326, + "objective/train/weights_max": 1.0364872217178345, + "objective/train/weights_min": 0.907203197479248, + "theoretical_loss": 3.7174958632829522, + "tokens_seen": 825753600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007573033707865169, + "loss": 1.752, + "theoretical_loss": 3.7172633179558763, + "tokens_seen": 826277888 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007569823434991975, + "loss": 1.746, + "theoretical_loss": 3.7167987934068156, + "tokens_seen": 827326464 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.008471048437058926, + "objective/train/docs_used": 477214, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.772156000137329, + "objective/train/original_loss": 3.7721569538116455, + "objective/train/theoretical_loss": 3.7167697856467186, + "objective/train/tokens_used": 847852000, + "objective/train/value_avg": -0.0251922607421875, + "objective/train/value_loss": 0.005669601261615753, + "objective/train/value_max": -0.0012493133544921875, + "objective/train/value_min": -0.5859375, + "objective/train/value_reward_corr": 0.3087012255670072, + "objective/train/value_std": 0.02972412109375, + "objective/train/weight_avg": 1.0008749961853027, + "objective/train/weighted_lm_loss": 3.7753219604492188, + "objective/train/weights_max": 1.043074369430542, + "objective/train/weights_min": 0.9086236953735352, + "theoretical_loss": 3.7167697856467186, + "tokens_seen": 827392000 + }, + { + "epoch": 0.25, + "learning_rate": 0.000756661316211878, + "loss": 1.7328, + "theoretical_loss": 3.7163350218484252, + "tokens_seen": 828375040 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.007294286508113146, + "objective/train/docs_used": 478014, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5869016647338867, + "objective/train/original_loss": 3.586902379989624, + "objective/train/theoretical_loss": 3.716045546037287, + "objective/train/tokens_used": 849490400, + "objective/train/value_avg": -0.06573486328125, + "objective/train/value_loss": 0.009683546610176563, + "objective/train/value_max": -0.0011739730834960938, + "objective/train/value_min": -0.9716796875, + "objective/train/value_reward_corr": 0.8484143280369967, + "objective/train/value_std": 0.1773681640625, + "objective/train/weight_avg": 1.0007768869400024, + "objective/train/weighted_lm_loss": 3.5873465538024902, + "objective/train/weights_max": 1.0326405763626099, + "objective/train/weights_min": 0.9103919267654419, + "theoretical_loss": 3.716045546037287, + "tokens_seen": 829030400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007563402889245586, + "loss": 1.7166, + "theoretical_loss": 3.7158720011092767, + "tokens_seen": 829423616 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007560192616372392, + "loss": 1.7059, + "theoretical_loss": 3.715409729026936, + "tokens_seen": 830472192 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": -0.0005511897034011781, + "objective/train/docs_used": 479315, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.710723876953125, + "objective/train/original_loss": 3.710724115371704, + "objective/train/theoretical_loss": 3.7153231361830956, + "objective/train/tokens_used": 851128800, + "objective/train/value_avg": -0.02777099609375, + "objective/train/value_loss": 0.012873864732682705, + "objective/train/value_max": -0.0012598037719726562, + "objective/train/value_min": -0.939453125, + "objective/train/value_reward_corr": 0.4140399657511822, + "objective/train/value_std": 0.043670654296875, + "objective/train/weight_avg": 1.0000081062316895, + "objective/train/weighted_lm_loss": 3.7088210582733154, + "objective/train/weights_max": 1.0579432249069214, + "objective/train/weights_min": 0.9084168672561646, + "theoretical_loss": 3.7153231361830956, + "tokens_seen": 830668800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007556982343499197, + "loss": 1.7148, + "theoretical_loss": 3.714948203447919, + "tokens_seen": 831520768 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.009345785714685917, + "objective/train/docs_used": 479893, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0162105560302734, + "objective/train/original_loss": 3.0162105560302734, + "objective/train/theoretical_loss": 3.7146025478660274, + "objective/train/tokens_used": 852767200, + "objective/train/value_avg": -0.0202178955078125, + "objective/train/value_loss": 0.0033227037638425827, + "objective/train/value_max": -0.00127410888671875, + "objective/train/value_min": -0.446533203125, + "objective/train/value_reward_corr": 0.2835792932461627, + "objective/train/value_std": 0.02740478515625, + "objective/train/weight_avg": 1.0009509325027466, + "objective/train/weighted_lm_loss": 3.0187928676605225, + "objective/train/weights_max": 1.0350995063781738, + "objective/train/weights_min": 0.9167730212211609, + "theoretical_loss": 3.7146025478660274, + "tokens_seen": 832307200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007553772070626004, + "loss": 1.6834, + "theoretical_loss": 3.7144874222276405, + "tokens_seen": 832569344 + }, + { + "epoch": 0.25, + "learning_rate": 0.000755056179775281, + "loss": 1.6922, + "theoretical_loss": 3.714027383230369, + "tokens_seen": 833617920 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.009116093628108501, + "objective/train/docs_used": 480416, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7582342624664307, + "objective/train/original_loss": 3.7582340240478516, + "objective/train/theoretical_loss": 3.7138837729209584, + "objective/train/tokens_used": 854405600, + "objective/train/value_avg": -0.01483154296875, + "objective/train/value_loss": 0.00040206220000982285, + "objective/train/value_max": -0.0011243820190429688, + "objective/train/value_min": -0.1729736328125, + "objective/train/value_reward_corr": 0.40890583755611903, + "objective/train/value_std": 0.01418304443359375, + "objective/train/weight_avg": 1.0009136199951172, + "objective/train/weighted_lm_loss": 3.761627674102783, + "objective/train/weights_max": 1.017242193222046, + "objective/train/weights_min": 0.9784783720970154, + "theoretical_loss": 3.7138837729209584, + "tokens_seen": 833945600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007547351524879616, + "loss": 1.7219, + "theoretical_loss": 3.713568084329175, + "tokens_seen": 834666496 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.007470104843378067, + "objective/train/docs_used": 481156, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4565505981445312, + "objective/train/original_loss": 3.456550359725952, + "objective/train/theoretical_loss": 3.713166803235318, + "objective/train/tokens_used": 856044000, + "objective/train/value_avg": -0.0184173583984375, + "objective/train/value_loss": 0.0034611623268574476, + "objective/train/value_max": -0.00080108642578125, + "objective/train/value_min": -0.30859375, + "objective/train/value_reward_corr": 0.3590465352863523, + "objective/train/value_std": 0.0196380615234375, + "objective/train/weight_avg": 1.000764012336731, + "objective/train/weighted_lm_loss": 3.458671808242798, + "objective/train/weights_max": 1.0213104486465454, + "objective/train/weights_min": 0.920558750629425, + "theoretical_loss": 3.713166803235318, + "tokens_seen": 835584000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007544141252006421, + "loss": 1.6895, + "theoretical_loss": 3.7131095234058895, + "tokens_seen": 835715072 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007540930979133227, + "loss": 1.7206, + "theoretical_loss": 3.712651698351051, + "tokens_seen": 836763648 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.008837671019136906, + "objective/train/docs_used": 482509, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2056589126586914, + "objective/train/original_loss": 3.2056589126586914, + "objective/train/theoretical_loss": 3.7124516307486424, + "objective/train/tokens_used": 857682400, + "objective/train/value_avg": -0.017242431640625, + "objective/train/value_loss": 0.0009282487444579601, + "objective/train/value_max": -0.0012493133544921875, + "objective/train/value_min": -0.330078125, + "objective/train/value_reward_corr": 0.34270907636256065, + "objective/train/value_std": 0.01763916015625, + "objective/train/weight_avg": 1.0008883476257324, + "objective/train/weighted_lm_loss": 3.208303689956665, + "objective/train/weights_max": 1.026044487953186, + "objective/train/weights_min": 0.9376290440559387, + "theoretical_loss": 3.7124516307486424, + "tokens_seen": 837222400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007537720706260033, + "loss": 1.6556, + "theoretical_loss": 3.7121946070638625, + "tokens_seen": 837812224 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.011638335883617401, + "objective/train/docs_used": 483094, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.10224723815918, + "objective/train/original_loss": 4.10224723815918, + "objective/train/theoretical_loss": 3.7117382474521436, + "objective/train/tokens_used": 859320800, + "objective/train/value_avg": -0.0171356201171875, + "objective/train/value_loss": 0.0024308599531650543, + "objective/train/value_max": -0.0011339187622070312, + "objective/train/value_min": -0.289794921875, + "objective/train/value_reward_corr": 0.21282312237072562, + "objective/train/value_std": 0.0193634033203125, + "objective/train/weight_avg": 1.0011757612228394, + "objective/train/weighted_lm_loss": 4.106822967529297, + "objective/train/weights_max": 1.0234427452087402, + "objective/train/weights_min": 0.9138486981391907, + "theoretical_loss": 3.7117382474521436, + "tokens_seen": 838860800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007534510433386838, + "loss": 1.7052, + "theoretical_loss": 3.7117382474521436, + "tokens_seen": 838860800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007531300160513643, + "loss": 1.7373, + "theoretical_loss": 3.7112826174322864, + "tokens_seen": 839909376 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.005088829901069403, + "objective/train/docs_used": 483926, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5363247394561768, + "objective/train/original_loss": 3.536324977874756, + "objective/train/theoretical_loss": 3.711026645388275, + "objective/train/tokens_used": 860959200, + "objective/train/value_avg": -0.0260009765625, + "objective/train/value_loss": 0.007420843932777643, + "objective/train/value_max": -0.0007066726684570312, + "objective/train/value_min": -0.712890625, + "objective/train/value_reward_corr": 0.6740125205366329, + "objective/train/value_std": 0.05389404296875, + "objective/train/weight_avg": 1.0005453824996948, + "objective/train/weighted_lm_loss": 3.539508581161499, + "objective/train/weights_max": 1.040179967880249, + "objective/train/weights_min": 0.9125500321388245, + "theoretical_loss": 3.711026645388275, + "tokens_seen": 840499200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007528089887640449, + "loss": 1.7242, + "theoretical_loss": 3.7108277149292066, + "tokens_seen": 840957952 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007524879614767255, + "loss": 1.7512, + "theoretical_loss": 3.7103735378763014, + "tokens_seen": 842006528 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.008162754587829113, + "objective/train/docs_used": 484283, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5741376876831055, + "objective/train/original_loss": 3.5741374492645264, + "objective/train/theoretical_loss": 3.7103168166503053, + "objective/train/tokens_used": 862597600, + "objective/train/value_avg": -0.02362060546875, + "objective/train/value_loss": 0.0041664037853479385, + "objective/train/value_max": -0.0012788772583007812, + "objective/train/value_min": -0.525390625, + "objective/train/value_reward_corr": 0.5682993457000869, + "objective/train/value_std": 0.038665771484375, + "objective/train/weight_avg": 1.000836968421936, + "objective/train/weighted_lm_loss": 3.5771594047546387, + "objective/train/weights_max": 1.0304319858551025, + "objective/train/weights_min": 0.9183948636054993, + "theoretical_loss": 3.7103168166503053, + "tokens_seen": 842137600 + }, + { + "epoch": 0.26, + "learning_rate": 0.000752166934189406, + "loss": 1.7656, + "theoretical_loss": 3.7099200842154003, + "tokens_seen": 843055104 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.008296966552734375, + "objective/train/docs_used": 485567, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1618123054504395, + "objective/train/original_loss": 3.1618120670318604, + "objective/train/theoretical_loss": 3.7096087533818953, + "objective/train/tokens_used": 864236000, + "objective/train/value_avg": -0.0165557861328125, + "objective/train/value_loss": 0.004001497756689787, + "objective/train/value_max": -0.0017414093017578125, + "objective/train/value_min": -0.3271484375, + "objective/train/value_reward_corr": 0.09802381245760701, + "objective/train/value_std": 0.01554107666015625, + "objective/train/weight_avg": 1.0008492469787598, + "objective/train/weighted_lm_loss": 3.163949966430664, + "objective/train/weights_max": 1.032689094543457, + "objective/train/weights_min": 0.907951831817627, + "theoretical_loss": 3.7096087533818953, + "tokens_seen": 843776000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007518459069020866, + "loss": 1.7323, + "theoretical_loss": 3.709467351896726, + "tokens_seen": 844103680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007515248796147673, + "loss": 1.7, + "theoretical_loss": 3.709015338878843, + "tokens_seen": 845152256 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.006563300266861916, + "objective/train/docs_used": 486205, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5831449031829834, + "objective/train/original_loss": 3.5831451416015625, + "objective/train/theoretical_loss": 3.7089024477766817, + "objective/train/tokens_used": 865874400, + "objective/train/value_avg": -0.0229034423828125, + "objective/train/value_loss": 0.003069969592615962, + "objective/train/value_max": -0.0011920928955078125, + "objective/train/value_min": -0.435302734375, + "objective/train/value_reward_corr": 0.47788579058807834, + "objective/train/value_std": 0.02996826171875, + "objective/train/weight_avg": 1.0006715059280396, + "objective/train/weighted_lm_loss": 3.58540678024292, + "objective/train/weights_max": 1.0432289838790894, + "objective/train/weights_min": 0.9311052560806274, + "theoretical_loss": 3.7089024477766817, + "tokens_seen": 845414400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007512038523274479, + "loss": 1.7223, + "theoretical_loss": 3.7085640431286198, + "tokens_seen": 846200832 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": -0.00424844678491354, + "objective/train/docs_used": 487507, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 4.001058101654053, + "objective/train/original_loss": 4.001058578491211, + "objective/train/theoretical_loss": 3.7081978920778607, + "objective/train/tokens_used": 867512800, + "objective/train/value_avg": -0.031036376953125, + "objective/train/value_loss": 0.008147111162543297, + "objective/train/value_max": -0.0011882781982421875, + "objective/train/value_min": -0.461669921875, + "objective/train/value_reward_corr": 0.5414622658452743, + "objective/train/value_std": 0.045745849609375, + "objective/train/weight_avg": 0.9996152520179749, + "objective/train/weighted_lm_loss": 4.00091552734375, + "objective/train/weights_max": 1.047112226486206, + "objective/train/weights_min": 0.9173831939697266, + "theoretical_loss": 3.7081978920778607, + "tokens_seen": 847052800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007508828250401284, + "loss": 1.7351, + "theoretical_loss": 3.7081134626211796, + "tokens_seen": 847249408 + }, + { + "epoch": 0.26, + "learning_rate": 0.000750561797752809, + "loss": 1.7296, + "theoretical_loss": 3.7076635953398607, + "tokens_seen": 848297984 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.00684879207983613, + "objective/train/docs_used": 487917, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.271728992462158, + "objective/train/original_loss": 3.2717292308807373, + "objective/train/theoretical_loss": 3.7074950785777787, + "objective/train/tokens_used": 869151200, + "objective/train/value_avg": -0.017608642578125, + "objective/train/value_loss": 0.003163976827636361, + "objective/train/value_max": -0.0014438629150390625, + "objective/train/value_min": -0.28369140625, + "objective/train/value_reward_corr": 0.20504764947916107, + "objective/train/value_std": 0.0175933837890625, + "objective/train/weight_avg": 1.0007003545761108, + "objective/train/weighted_lm_loss": 3.2739343643188477, + "objective/train/weights_max": 1.0284767150878906, + "objective/train/weights_min": 0.920933723449707, + "theoretical_loss": 3.7074950785777787, + "tokens_seen": 848691200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007502407704654896, + "loss": 1.7328, + "theoretical_loss": 3.7072144392761697, + "tokens_seen": 849346560 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.013067849911749363, + "objective/train/docs_used": 489068, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4604387283325195, + "objective/train/original_loss": 3.4604389667510986, + "objective/train/theoretical_loss": 3.706793999617529, + "objective/train/tokens_used": 870789600, + "objective/train/value_avg": -0.031707763671875, + "objective/train/value_loss": 0.004168206360191107, + "objective/train/value_max": -0.0010528564453125, + "objective/train/value_min": -0.396728515625, + "objective/train/value_reward_corr": 0.46273780209434623, + "objective/train/value_std": 0.043426513671875, + "objective/train/weight_avg": 1.001327395439148, + "objective/train/weighted_lm_loss": 3.465242862701416, + "objective/train/weights_max": 1.0376992225646973, + "objective/train/weights_min": 0.9202264547348022, + "theoretical_loss": 3.706793999617529, + "tokens_seen": 850329600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007499197431781701, + "loss": 1.7196, + "theoretical_loss": 3.70676599242974, + "tokens_seen": 850395136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007495987158908507, + "loss": 1.73, + "theoretical_loss": 3.706318252808291, + "tokens_seen": 851443712 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": -0.0006452878005802631, + "objective/train/docs_used": 489617, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5713086128234863, + "objective/train/original_loss": 3.5713088512420654, + "objective/train/theoretical_loss": 3.7060946475865455, + "objective/train/tokens_used": 872428000, + "objective/train/value_avg": -0.022796630859375, + "objective/train/value_loss": 0.007265492808073759, + "objective/train/value_max": -0.0010814666748046875, + "objective/train/value_min": -0.51513671875, + "objective/train/value_reward_corr": 0.3129265953261318, + "objective/train/value_std": 0.022186279296875, + "objective/train/weight_avg": 0.9999712705612183, + "objective/train/weighted_lm_loss": 3.5722944736480713, + "objective/train/weights_max": 1.0196866989135742, + "objective/train/weights_min": 0.9128497838973999, + "theoretical_loss": 3.7060946475865455, + "tokens_seen": 851968000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007492776886035313, + "loss": 1.7146, + "theoretical_loss": 3.705871218427581, + "tokens_seen": 852492288 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007489566613162119, + "loss": 1.7039, + "theoretical_loss": 3.705424887311368, + "tokens_seen": 853540864 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": -0.0009023576276376843, + "objective/train/docs_used": 491051, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.497465133666992, + "objective/train/original_loss": 3.497465133666992, + "objective/train/theoretical_loss": 3.705397014922211, + "objective/train/tokens_used": 874066400, + "objective/train/value_avg": -0.01544952392578125, + "objective/train/value_loss": 0.00328363967128098, + "objective/train/value_max": -0.0013408660888671875, + "objective/train/value_min": -0.276123046875, + "objective/train/value_reward_corr": 0.10848740767926884, + "objective/train/value_std": 0.0182037353515625, + "objective/train/weight_avg": 0.9999260306358337, + "objective/train/weighted_lm_loss": 3.499988079071045, + "objective/train/weights_max": 1.026231288909912, + "objective/train/weights_min": 0.9174668788909912, + "theoretical_loss": 3.705397014922211, + "tokens_seen": 853606400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007486356340288924, + "loss": 1.7302, + "theoretical_loss": 3.704979257491368, + "tokens_seen": 854589440 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0038026131223887205, + "objective/train/docs_used": 491675, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.16001033782959, + "objective/train/original_loss": 3.160010576248169, + "objective/train/theoretical_loss": 3.70470109410946, + "objective/train/tokens_used": 875704800, + "objective/train/value_avg": -0.03515625, + "objective/train/value_loss": 0.01113983802497387, + "objective/train/value_max": -0.0009255409240722656, + "objective/train/value_min": -0.7431640625, + "objective/train/value_reward_corr": 0.3673317956048083, + "objective/train/value_std": 0.0438232421875, + "objective/train/weight_avg": 1.0004351139068604, + "objective/train/weighted_lm_loss": 3.1611132621765137, + "objective/train/weights_max": 1.0351911783218384, + "objective/train/weights_min": 0.9091296195983887, + "theoretical_loss": 3.70470109410946, + "tokens_seen": 855244800 + }, + { + "epoch": 0.26, + "learning_rate": 0.000748314606741573, + "loss": 1.7623, + "theoretical_loss": 3.704534327007211, + "tokens_seen": 855638016 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007479935794542536, + "loss": 1.7098, + "theoretical_loss": 3.7040900939064008, + "tokens_seen": 856686592 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.008446824736893177, + "objective/train/docs_used": 492132, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.607121229171753, + "objective/train/original_loss": 3.607121706008911, + "objective/train/theoretical_loss": 3.7040068776803894, + "objective/train/tokens_used": 877343200, + "objective/train/value_avg": -0.0222320556640625, + "objective/train/value_loss": 0.00668382691219449, + "objective/train/value_max": -0.0011472702026367188, + "objective/train/value_min": -0.482421875, + "objective/train/value_reward_corr": 0.26064958105804986, + "objective/train/value_std": 0.0287933349609375, + "objective/train/weight_avg": 1.0008774995803833, + "objective/train/weighted_lm_loss": 3.610283374786377, + "objective/train/weights_max": 1.030609369277954, + "objective/train/weights_min": 0.9121820330619812, + "theoretical_loss": 3.7040068776803894, + "tokens_seen": 856883200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007476725521669341, + "loss": 1.7352, + "theoretical_loss": 3.7036465562442746, + "tokens_seen": 857735168 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.00576441828161478, + "objective/train/docs_used": 493247, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.48283052444458, + "objective/train/original_loss": 3.48283052444458, + "objective/train/theoretical_loss": 3.7033143582138752, + "objective/train/tokens_used": 878981600, + "objective/train/value_avg": -0.0126190185546875, + "objective/train/value_loss": 0.0018484744941815734, + "objective/train/value_max": -0.0007700920104980469, + "objective/train/value_min": -0.6376953125, + "objective/train/value_reward_corr": 0.46262401610479187, + "objective/train/value_std": 0.01534271240234375, + "objective/train/weight_avg": 1.0005855560302734, + "objective/train/weighted_lm_loss": 3.4842069149017334, + "objective/train/weights_max": 1.0215047597885132, + "objective/train/weights_min": 0.9118712544441223, + "theoretical_loss": 3.7033143582138752, + "tokens_seen": 858521600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007473515248796148, + "loss": 1.7579, + "theoretical_loss": 3.703203712083961, + "tokens_seen": 858783744 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007470304975922954, + "loss": 1.7381, + "theoretical_loss": 3.702761559496338, + "tokens_seen": 859832320 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.009475963190197945, + "objective/train/docs_used": 493898, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4138476848602295, + "objective/train/original_loss": 3.4138479232788086, + "objective/train/theoretical_loss": 3.70262352833519, + "objective/train/tokens_used": 880620000, + "objective/train/value_avg": -0.0150604248046875, + "objective/train/value_loss": 0.0018525092164054513, + "objective/train/value_max": -0.0011034011840820312, + "objective/train/value_min": -0.24560546875, + "objective/train/value_reward_corr": 0.11225885407645358, + "objective/train/value_std": 0.01404571533203125, + "objective/train/weight_avg": 1.000956654548645, + "objective/train/weighted_lm_loss": 3.4171526432037354, + "objective/train/weights_max": 1.024002194404602, + "objective/train/weights_min": 0.9107770323753357, + "theoretical_loss": 3.70262352833519, + "tokens_seen": 860160000 + }, + { + "epoch": 0.26, + "learning_rate": 0.000746709470304976, + "loss": 1.7457, + "theoretical_loss": 3.7023200965599967, + "tokens_seen": 860880896 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.009265927597880363, + "objective/train/docs_used": 495406, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.423795700073242, + "objective/train/original_loss": 3.4237959384918213, + "objective/train/theoretical_loss": 3.701934380715622, + "objective/train/tokens_used": 882258400, + "objective/train/value_avg": -0.03125, + "objective/train/value_loss": 0.004572381731122732, + "objective/train/value_max": -0.001483917236328125, + "objective/train/value_min": -0.7265625, + "objective/train/value_reward_corr": 0.6496874464241936, + "objective/train/value_std": 0.0498046875, + "objective/train/weight_avg": 1.0009492635726929, + "objective/train/weighted_lm_loss": 3.427004337310791, + "objective/train/weights_max": 1.0358208417892456, + "objective/train/weights_min": 0.9106564521789551, + "theoretical_loss": 3.701934380715622, + "tokens_seen": 861798400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007463884430176565, + "loss": 1.7292, + "theoretical_loss": 3.7018793213611954, + "tokens_seen": 861929472 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007460674157303371, + "loss": 1.7373, + "theoretical_loss": 3.7014392319938265, + "tokens_seen": 862978048 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0010764370672404766, + "objective/train/docs_used": 496136, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.450728416442871, + "objective/train/original_loss": 3.4507291316986084, + "objective/train/theoretical_loss": 3.7012469080721084, + "objective/train/tokens_used": 883896800, + "objective/train/value_avg": -0.029022216796875, + "objective/train/value_loss": 0.008429894223809242, + "objective/train/value_max": -0.0009546279907226562, + "objective/train/value_min": -0.8994140625, + "objective/train/value_reward_corr": 0.5480327239847823, + "objective/train/value_std": 0.0467529296875, + "objective/train/weight_avg": 1.000149130821228, + "objective/train/weighted_lm_loss": 3.450284004211426, + "objective/train/weights_max": 1.0653489828109741, + "objective/train/weights_min": 0.9113413095474243, + "theoretical_loss": 3.7012469080721084, + "tokens_seen": 863436800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007457463884430177, + "loss": 1.7295, + "theoretical_loss": 3.700999826559369, + "tokens_seen": 864026624 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0025910516269505024, + "objective/train/docs_used": 497267, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.491219997406006, + "objective/train/original_loss": 3.4912195205688477, + "objective/train/theoretical_loss": 3.700561103166857, + "objective/train/tokens_used": 885535200, + "objective/train/value_avg": -0.0311737060546875, + "objective/train/value_loss": 0.008941441774368286, + "objective/train/value_max": -0.0009002685546875, + "objective/train/value_min": -0.66748046875, + "objective/train/value_reward_corr": 0.5071615405360274, + "objective/train/value_std": 0.0589599609375, + "objective/train/weight_avg": 1.000303030014038, + "objective/train/weighted_lm_loss": 3.4912471771240234, + "objective/train/weights_max": 1.040114402770996, + "objective/train/weights_min": 0.9065442085266113, + "theoretical_loss": 3.700561103166857, + "tokens_seen": 865075200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007454253611556983, + "loss": 1.7315, + "theoretical_loss": 3.700561103166857, + "tokens_seen": 865075200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007451043338683788, + "loss": 1.7657, + "theoretical_loss": 3.7001230599328334, + "tokens_seen": 866123776 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": -0.03113710694015026, + "objective/train/docs_used": 497986, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.445004940032959, + "objective/train/original_loss": 3.445005416870117, + "objective/train/theoretical_loss": 3.6998769588069838, + "objective/train/tokens_used": 887173600, + "objective/train/value_avg": -0.0260772705078125, + "objective/train/value_loss": 0.033920567482709885, + "objective/train/value_max": -0.001255035400390625, + "objective/train/value_min": -0.54150390625, + "objective/train/value_reward_corr": 0.3026756038091616, + "objective/train/value_std": 0.0272216796875, + "objective/train/weight_avg": 0.9970515370368958, + "objective/train/weighted_lm_loss": 3.4388821125030518, + "objective/train/weights_max": 1.0249840021133423, + "objective/train/weights_min": 0.9060051441192627, + "theoretical_loss": 3.6998769588069838, + "tokens_seen": 866713600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007447833065810594, + "loss": 1.7342, + "theoretical_loss": 3.6996856949813184, + "tokens_seen": 867172352 + }, + { + "epoch": 0.26, + "learning_rate": 0.00074446227929374, + "loss": 1.7245, + "theoretical_loss": 3.6992490064437624, + "tokens_seen": 868220928 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.002971676643937826, + "objective/train/docs_used": 499247, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1106514930725098, + "objective/train/original_loss": 3.110651731491089, + "objective/train/theoretical_loss": 3.6991944678441504, + "objective/train/tokens_used": 888812000, + "objective/train/value_avg": -0.0184173583984375, + "objective/train/value_loss": 0.0055656093172729015, + "objective/train/value_max": -0.0010442733764648438, + "objective/train/value_min": -0.61767578125, + "objective/train/value_reward_corr": 0.3980564886112472, + "objective/train/value_std": 0.024383544921875, + "objective/train/weight_avg": 1.0003243684768677, + "objective/train/weighted_lm_loss": 3.1124062538146973, + "objective/train/weights_max": 1.0246340036392212, + "objective/train/weights_min": 0.9159946441650391, + "theoretical_loss": 3.6991944678441504, + "tokens_seen": 868352000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007441412520064205, + "loss": 1.7192, + "theoretical_loss": 3.6988129924590156, + "tokens_seen": 869269504 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.011536385864019394, + "objective/train/docs_used": 499921, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.362466335296631, + "objective/train/original_loss": 3.36246657371521, + "objective/train/theoretical_loss": 3.6985136231742035, + "objective/train/tokens_used": 890450400, + "objective/train/value_avg": -0.0161895751953125, + "objective/train/value_loss": 0.000782955321483314, + "objective/train/value_max": -0.000720977783203125, + "objective/train/value_min": -0.26953125, + "objective/train/value_reward_corr": 0.12068397397280724, + "objective/train/value_std": 0.016448974609375, + "objective/train/weight_avg": 1.001157522201538, + "objective/train/weighted_lm_loss": 3.3660099506378174, + "objective/train/weights_max": 1.0271213054656982, + "objective/train/weights_min": 0.9717966318130493, + "theoretical_loss": 3.6985136231742035, + "tokens_seen": 869990400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007438202247191011, + "loss": 1.7133, + "theoretical_loss": 3.698377651173285, + "tokens_seen": 870318080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007434991974317818, + "loss": 1.7263, + "theoretical_loss": 3.6979429807400965, + "tokens_seen": 871366656 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.008591736666858196, + "objective/train/docs_used": 501092, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0590295791625977, + "objective/train/original_loss": 3.0590295791625977, + "objective/train/theoretical_loss": 3.69783441773682, + "objective/train/tokens_used": 892088800, + "objective/train/value_avg": -0.0274505615234375, + "objective/train/value_loss": 0.00764114735648036, + "objective/train/value_max": -0.001239776611328125, + "objective/train/value_min": -0.8486328125, + "objective/train/value_reward_corr": 0.23019838039104254, + "objective/train/value_std": 0.028594970703125, + "objective/train/weight_avg": 1.000896692276001, + "objective/train/weighted_lm_loss": 3.0627682209014893, + "objective/train/weights_max": 1.0249210596084595, + "objective/train/weights_min": 0.911127507686615, + "theoretical_loss": 3.69783441773682, + "tokens_seen": 871628800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007431781701444624, + "loss": 1.7131, + "theoretical_loss": 3.6975089793202613, + "tokens_seen": 872415232 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0051925466395914555, + "objective/train/docs_used": 501627, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.310394525527954, + "objective/train/original_loss": 3.310394287109375, + "objective/train/theoretical_loss": 3.6971568445151535, + "objective/train/tokens_used": 893727200, + "objective/train/value_avg": -0.0178985595703125, + "objective/train/value_loss": 0.0030840449035167694, + "objective/train/value_max": -0.0011072158813476562, + "objective/train/value_min": -0.44189453125, + "objective/train/value_reward_corr": 0.4244540282891391, + "objective/train/value_std": 0.0206451416015625, + "objective/train/weight_avg": 1.0005344152450562, + "objective/train/weighted_lm_loss": 3.3124639987945557, + "objective/train/weights_max": 1.0260467529296875, + "objective/train/weights_min": 0.9064141511917114, + "theoretical_loss": 3.6971568445151535, + "tokens_seen": 873267200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007428571428571429, + "loss": 1.721, + "theoretical_loss": 3.697075645081833, + "tokens_seen": 873463808 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007425361155698235, + "loss": 1.735, + "theoretical_loss": 3.6966429762000756, + "tokens_seen": 874512384 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.012385562993586063, + "objective/train/docs_used": 502963, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.349586248397827, + "objective/train/original_loss": 3.349586009979248, + "objective/train/theoretical_loss": 3.696480896535487, + "objective/train/tokens_used": 895365600, + "objective/train/value_avg": -0.0161285400390625, + "objective/train/value_loss": 0.000577146711293608, + "objective/train/value_max": -0.0011243820190429688, + "objective/train/value_min": -0.282958984375, + "objective/train/value_reward_corr": 0.33606904340615634, + "objective/train/value_std": 0.0187835693359375, + "objective/train/weight_avg": 1.0012414455413818, + "objective/train/weighted_lm_loss": 3.3537087440490723, + "objective/train/weights_max": 1.0253978967666626, + "objective/train/weights_min": 0.9777106046676636, + "theoretical_loss": 3.696480896535487, + "tokens_seen": 874905600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007422150882825041, + "loss": 1.7166, + "theoretical_loss": 3.696210970857422, + "tokens_seen": 875560960 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.01223513763397932, + "objective/train/docs_used": 503455, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.756373167037964, + "objective/train/original_loss": 3.7563729286193848, + "objective/train/theoretical_loss": 3.695806566866887, + "objective/train/tokens_used": 897004000, + "objective/train/value_avg": -0.0203399658203125, + "objective/train/value_loss": 0.001855852548032999, + "objective/train/value_max": -0.0010442733764648438, + "objective/train/value_min": -0.421875, + "objective/train/value_reward_corr": 0.3954064831324931, + "objective/train/value_std": 0.0250701904296875, + "objective/train/weight_avg": 1.0012327432632446, + "objective/train/weighted_lm_loss": 3.7608394622802734, + "objective/train/weights_max": 1.036293864250183, + "objective/train/weights_min": 0.9157010316848755, + "theoretical_loss": 3.695806566866887, + "tokens_seen": 876544000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007418940609951846, + "loss": 1.7265, + "theoretical_loss": 3.695779627243439, + "tokens_seen": 876609536 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007415730337078652, + "loss": 1.7294, + "theoretical_loss": 3.695348943554793, + "tokens_seen": 877658112 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.006806990597397089, + "objective/train/docs_used": 504719, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.967519998550415, + "objective/train/original_loss": 2.967519521713257, + "objective/train/theoretical_loss": 3.695133848620862, + "objective/train/tokens_used": 898642400, + "objective/train/value_avg": -0.017333984375, + "objective/train/value_loss": 0.0017174782697111368, + "objective/train/value_max": -0.0009508132934570312, + "objective/train/value_min": -0.317138671875, + "objective/train/value_reward_corr": 0.14345230277495838, + "objective/train/value_std": 0.0160064697265625, + "objective/train/weight_avg": 1.000689148902893, + "objective/train/weighted_lm_loss": 2.9695005416870117, + "objective/train/weights_max": 1.03213632106781, + "objective/train/weights_min": 0.9318362474441528, + "theoretical_loss": 3.695133848620862, + "tokens_seen": 878182400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007412520064205458, + "loss": 1.7114, + "theoretical_loss": 3.6949189179952113, + "tokens_seen": 878706688 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007409309791332264, + "loss": 1.7474, + "theoretical_loss": 3.6944895487754454, + "tokens_seen": 879755264 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.0041221934370696545, + "objective/train/docs_used": 505361, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1063263416290283, + "objective/train/original_loss": 3.1063265800476074, + "objective/train/theoretical_loss": 3.694462734951022, + "objective/train/tokens_used": 900280800, + "objective/train/value_avg": -0.01549530029296875, + "objective/train/value_loss": 0.0027869348414242268, + "objective/train/value_max": -0.0010080337524414062, + "objective/train/value_min": -0.406494140625, + "objective/train/value_reward_corr": 0.6386404107192307, + "objective/train/value_std": 0.020660400390625, + "objective/train/weight_avg": 0.9996015429496765, + "objective/train/weighted_lm_loss": 3.105362892150879, + "objective/train/weights_max": 1.0186715126037598, + "objective/train/weights_min": 0.9426085352897644, + "theoretical_loss": 3.694462734951022, + "tokens_seen": 879820800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007406099518459069, + "loss": 1.7275, + "theoretical_loss": 3.6940608341132375, + "tokens_seen": 880803840 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.013944916427135468, + "objective/train/docs_used": 505856, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.612179756164551, + "objective/train/original_loss": 3.612179756164551, + "objective/train/theoretical_loss": 3.693793219052748, + "objective/train/tokens_used": 901919200, + "objective/train/value_avg": -0.0257110595703125, + "objective/train/value_loss": 0.004598401021212339, + "objective/train/value_max": -0.001201629638671875, + "objective/train/value_min": -0.55517578125, + "objective/train/value_reward_corr": 0.34911867203251873, + "objective/train/value_std": 0.035736083984375, + "objective/train/weight_avg": 1.0014172792434692, + "objective/train/weighted_lm_loss": 3.616940498352051, + "objective/train/weights_max": 1.0482829809188843, + "objective/train/weights_min": 0.9096745252609253, + "theoretical_loss": 3.693793219052748, + "tokens_seen": 881459200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007402889245585875, + "loss": 1.7265, + "theoretical_loss": 3.693632772233284, + "tokens_seen": 881852416 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007399678972712681, + "loss": 1.7214, + "theoretical_loss": 3.6932053613671982, + "tokens_seen": 882900992 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.00490741478279233, + "objective/train/docs_used": 506939, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2767350673675537, + "objective/train/original_loss": 3.276735782623291, + "objective/train/theoretical_loss": 3.6931252941628547, + "objective/train/tokens_used": 903557600, + "objective/train/value_avg": -0.01552581787109375, + "objective/train/value_loss": 0.0032388130202889442, + "objective/train/value_max": -0.0011205673217773438, + "objective/train/value_min": -0.297607421875, + "objective/train/value_reward_corr": 0.1381606542172207, + "objective/train/value_std": 0.015838623046875, + "objective/train/weight_avg": 0.9995253086090088, + "objective/train/weighted_lm_loss": 3.2778003215789795, + "objective/train/weights_max": 1.0300813913345337, + "objective/train/weights_min": 0.9786436557769775, + "theoretical_loss": 3.6931252941628547, + "tokens_seen": 883097600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007396468699839487, + "loss": 1.7196, + "theoretical_loss": 3.6927785997534794, + "tokens_seen": 883949568 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.011441457085311413, + "objective/train/docs_used": 507654, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.323378324508667, + "objective/train/original_loss": 3.323378086090088, + "objective/train/theoretical_loss": 3.6924589535592656, + "objective/train/tokens_used": 905196000, + "objective/train/value_avg": -0.01496124267578125, + "objective/train/value_loss": 0.00036536698462441564, + "objective/train/value_max": -0.0010728836059570312, + "objective/train/value_min": -0.314453125, + "objective/train/value_reward_corr": 0.16497726953540987, + "objective/train/value_std": 0.01389312744140625, + "objective/train/weight_avg": 1.0011459589004517, + "objective/train/weighted_lm_loss": 3.3273065090179443, + "objective/train/weights_max": 1.0318375825881958, + "objective/train/weights_min": 0.9886816740036011, + "theoretical_loss": 3.6924589535592656, + "tokens_seen": 884736000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007393258426966293, + "loss": 1.7232, + "theoretical_loss": 3.692352485637474, + "tokens_seen": 884998144 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007390048154093099, + "loss": 1.6902, + "theoretical_loss": 3.6919270172713414, + "tokens_seen": 886046720 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.003024842357262969, + "objective/train/docs_used": 509172, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2125532627105713, + "objective/train/original_loss": 3.212552785873413, + "objective/train/theoretical_loss": 3.691794190560686, + "objective/train/tokens_used": 906834400, + "objective/train/value_avg": -0.02130126953125, + "objective/train/value_loss": 0.0070039741694927216, + "objective/train/value_max": -0.0010776519775390625, + "objective/train/value_min": -0.7666015625, + "objective/train/value_reward_corr": 0.3422172415594218, + "objective/train/value_std": 0.0232086181640625, + "objective/train/weight_avg": 1.0003366470336914, + "objective/train/weighted_lm_loss": 3.2115392684936523, + "objective/train/weights_max": 1.026817798614502, + "objective/train/weights_min": 0.9071464538574219, + "theoretical_loss": 3.691794190560686, + "tokens_seen": 886374400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007386837881219904, + "loss": 1.7144, + "theoretical_loss": 3.6915021929140224, + "tokens_seen": 887095296 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.006491431500762701, + "objective/train/docs_used": 509747, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5276832580566406, + "objective/train/original_loss": 2.5276834964752197, + "objective/train/theoretical_loss": 3.691130998526281, + "objective/train/tokens_used": 908472800, + "objective/train/value_avg": -0.0289459228515625, + "objective/train/value_loss": 0.01007924135774374, + "objective/train/value_max": -0.0009145736694335938, + "objective/train/value_min": -0.91455078125, + "objective/train/value_reward_corr": 0.5238780984793489, + "objective/train/value_std": 0.044952392578125, + "objective/train/weight_avg": 0.9994004964828491, + "objective/train/weighted_lm_loss": 2.5266315937042236, + "objective/train/weights_max": 1.050829529762268, + "objective/train/weights_min": 0.9076675176620483, + "theoretical_loss": 3.691130998526281, + "tokens_seen": 888012800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007383627608346709, + "loss": 1.6931, + "theoretical_loss": 3.691078010831202, + "tokens_seen": 888143872 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007380417335473515, + "loss": 1.6978, + "theoretical_loss": 3.690654469295275, + "tokens_seen": 889192448 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.00305996835231781, + "objective/train/docs_used": 511200, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.3515465259552, + "objective/train/original_loss": 2.351546287536621, + "objective/train/theoretical_loss": 3.690469370855356, + "objective/train/tokens_used": 910111200, + "objective/train/value_avg": -0.03607177734375, + "objective/train/value_loss": 0.010835610330104828, + "objective/train/value_max": -0.0009145736694335938, + "objective/train/value_min": -0.50732421875, + "objective/train/value_reward_corr": 0.5617076146495756, + "objective/train/value_std": 0.06304931640625, + "objective/train/weight_avg": 1.0003591775894165, + "objective/train/weighted_lm_loss": 2.3511040210723877, + "objective/train/weights_max": 1.0348095893859863, + "objective/train/weights_min": 0.9102575182914734, + "theoretical_loss": 3.690469370855356, + "tokens_seen": 889651200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007377207062600321, + "loss": 1.7136, + "theoretical_loss": 3.6902315665853163, + "tokens_seen": 890241024 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.004864597227424383, + "objective/train/docs_used": 511885, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.412478446960449, + "objective/train/original_loss": 3.4124786853790283, + "objective/train/theoretical_loss": 3.689809300987042, + "objective/train/tokens_used": 911749600, + "objective/train/value_avg": -0.02203369140625, + "objective/train/value_loss": 0.006089900154620409, + "objective/train/value_max": -0.0010900497436523438, + "objective/train/value_min": -0.93798828125, + "objective/train/value_reward_corr": 0.5587708993217742, + "objective/train/value_std": 0.033355712890625, + "objective/train/weight_avg": 1.000516414642334, + "objective/train/weighted_lm_loss": 3.4134488105773926, + "objective/train/weights_max": 1.0401052236557007, + "objective/train/weights_min": 0.9096028804779053, + "theoretical_loss": 3.689809300987042, + "tokens_seen": 891289600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007373996789727126, + "loss": 1.6857, + "theoretical_loss": 3.689809300987042, + "tokens_seen": 891289600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007370786516853932, + "loss": 1.7149, + "theoretical_loss": 3.6893876707927777, + "tokens_seen": 892338176 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.010663343593478203, + "objective/train/docs_used": 512575, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3967607021331787, + "objective/train/original_loss": 3.396761178970337, + "objective/train/theoretical_loss": 3.68915078239998, + "objective/train/tokens_used": 913388000, + "objective/train/value_avg": -0.0212860107421875, + "objective/train/value_loss": 0.0032343347556889057, + "objective/train/value_max": -0.0009508132934570312, + "objective/train/value_min": -0.5302734375, + "objective/train/value_reward_corr": 0.3299388428367234, + "objective/train/value_std": 0.03485107421875, + "objective/train/weight_avg": 1.0010823011398315, + "objective/train/weighted_lm_loss": 3.39971661567688, + "objective/train/weights_max": 1.0394830703735352, + "objective/train/weights_min": 0.9076371788978577, + "theoretical_loss": 3.68915078239998, + "tokens_seen": 892928000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007367576243980738, + "loss": 1.6754, + "theoretical_loss": 3.6889666743014295, + "tokens_seen": 893386752 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007364365971107544, + "loss": 1.6781, + "theoretical_loss": 3.6885463098184434, + "tokens_seen": 894435328 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.008073430508375168, + "objective/train/docs_used": 513630, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.396184206008911, + "objective/train/original_loss": 3.396184206008911, + "objective/train/theoretical_loss": 3.688493808612015, + "objective/train/tokens_used": 915026400, + "objective/train/value_avg": -0.01308441162109375, + "objective/train/value_loss": 0.0005178208230063319, + "objective/train/value_max": -0.0011072158813476562, + "objective/train/value_min": -0.486083984375, + "objective/train/value_reward_corr": 0.5454730463993835, + "objective/train/value_std": 0.0186004638671875, + "objective/train/weight_avg": 1.0008100271224976, + "objective/train/weighted_lm_loss": 3.3989737033843994, + "objective/train/weights_max": 1.0317968130111694, + "objective/train/weights_min": 0.9588714241981506, + "theoretical_loss": 3.688493808612015, + "tokens_seen": 894566400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007361155698234349, + "loss": 1.656, + "theoretical_loss": 3.6881265756557795, + "tokens_seen": 895483904 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.009677364490926266, + "objective/train/docs_used": 514378, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3851091861724854, + "objective/train/original_loss": 3.3851096630096436, + "objective/train/theoretical_loss": 3.6878383731798863, + "objective/train/tokens_used": 916664800, + "objective/train/value_avg": -0.0180511474609375, + "objective/train/value_loss": 0.0011711184633895755, + "objective/train/value_max": -0.0009660720825195312, + "objective/train/value_min": -0.481201171875, + "objective/train/value_reward_corr": 0.2670100658088544, + "objective/train/value_std": 0.0225372314453125, + "objective/train/weight_avg": 1.0009735822677612, + "objective/train/weighted_lm_loss": 3.3887457847595215, + "objective/train/weights_max": 1.0408648252487183, + "objective/train/weights_min": 0.9359936714172363, + "theoretical_loss": 3.6878383731798863, + "tokens_seen": 896204800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007357945425361155, + "loss": 1.698, + "theoretical_loss": 3.6877074701318735, + "tokens_seen": 896532480 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007354735152487962, + "loss": 1.6717, + "theoretical_loss": 3.6872889915716107, + "tokens_seen": 897581056 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.005615198984742165, + "objective/train/docs_used": 515835, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4211585521698, + "objective/train/original_loss": 3.421159029006958, + "objective/train/theoretical_loss": 3.6871844696989227, + "objective/train/tokens_used": 918303200, + "objective/train/value_avg": -0.019134521484375, + "objective/train/value_loss": 0.0025866597425192595, + "objective/train/value_max": -0.0011472702026367188, + "objective/train/value_min": -0.7822265625, + "objective/train/value_reward_corr": 0.4153080538223145, + "objective/train/value_std": 0.023681640625, + "objective/train/weight_avg": 1.0005742311477661, + "objective/train/weighted_lm_loss": 3.4234983921051025, + "objective/train/weights_max": 1.0632069110870361, + "objective/train/weights_min": 0.9100597500801086, + "theoretical_loss": 3.6871844696989227, + "tokens_seen": 897843200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007351524879614768, + "loss": 1.6987, + "theoretical_loss": 3.6868711383062873, + "tokens_seen": 898629632 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.02012377604842186, + "objective/train/docs_used": 516375, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.97676944732666, + "objective/train/original_loss": 2.9767696857452393, + "objective/train/theoretical_loss": 3.6865320918027438, + "objective/train/tokens_used": 919941600, + "objective/train/value_avg": -0.01885986328125, + "objective/train/value_loss": 0.013747085817158222, + "objective/train/value_max": -0.0009584426879882812, + "objective/train/value_min": -0.74853515625, + "objective/train/value_reward_corr": 0.1646661343562317, + "objective/train/value_std": 0.0217437744140625, + "objective/train/weight_avg": 0.9980553984642029, + "objective/train/weighted_lm_loss": 2.977247714996338, + "objective/train/weights_max": 1.0337446928024292, + "objective/train/weights_min": 0.932577908039093, + "theoretical_loss": 3.6865320918027438, + "tokens_seen": 899481600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007348314606741573, + "loss": 1.6581, + "theoretical_loss": 3.686453908673583, + "tokens_seen": 899678208 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007345104333868379, + "loss": 1.6711, + "theoretical_loss": 3.6860373010175262, + "tokens_seen": 900726784 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.006894620135426521, + "objective/train/docs_used": 517599, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1583030223846436, + "objective/train/original_loss": 3.1583030223846436, + "objective/train/theoretical_loss": 3.685881233162962, + "objective/train/tokens_used": 921580000, + "objective/train/value_avg": -0.01389312744140625, + "objective/train/value_loss": 0.008474774658679962, + "objective/train/value_max": -0.0005359649658203125, + "objective/train/value_min": -0.386474609375, + "objective/train/value_reward_corr": 0.4882815880279635, + "objective/train/value_std": 0.01763916015625, + "objective/train/weight_avg": 0.9993520379066467, + "objective/train/weighted_lm_loss": 3.154780387878418, + "objective/train/weights_max": 1.031559944152832, + "objective/train/weights_min": 0.9187752604484558, + "theoretical_loss": 3.685881233162962, + "tokens_seen": 901120000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007341894060995185, + "loss": 1.6874, + "theoretical_loss": 3.685621313688465, + "tokens_seen": 901775360 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.008442961610853672, + "objective/train/docs_used": 518319, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.436699628829956, + "objective/train/original_loss": 3.436699867248535, + "objective/train/theoretical_loss": 3.6852318874888836, + "objective/train/tokens_used": 923218400, + "objective/train/value_avg": -0.01410675048828125, + "objective/train/value_loss": 0.0021123578771948814, + "objective/train/value_max": -0.0007181167602539062, + "objective/train/value_min": -0.358154296875, + "objective/train/value_reward_corr": 0.20619148465229722, + "objective/train/value_std": 0.0160064697265625, + "objective/train/weight_avg": 1.0008546113967896, + "objective/train/weighted_lm_loss": 3.439610719680786, + "objective/train/weights_max": 1.025087594985962, + "objective/train/weights_min": 0.9168638586997986, + "theoretical_loss": 3.6852318874888836, + "tokens_seen": 902758400 + }, + { + "epoch": 0.27, + "learning_rate": 0.000733868378812199, + "loss": 1.6809, + "theoretical_loss": 3.6852059450430343, + "tokens_seen": 902823936 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007335473515248796, + "loss": 1.6731, + "theoretical_loss": 3.6847911934441244, + "tokens_seen": 903872512 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0013861020561307669, + "objective/train/docs_used": 519704, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1463303565979004, + "objective/train/original_loss": 3.1463301181793213, + "objective/train/theoretical_loss": 3.6845840485272205, + "objective/train/tokens_used": 924856800, + "objective/train/value_avg": -0.0225677490234375, + "objective/train/value_loss": 0.008597981184720993, + "objective/train/value_max": -0.0008296966552734375, + "objective/train/value_min": -0.69482421875, + "objective/train/value_reward_corr": 0.40919362735555426, + "objective/train/value_std": 0.0305023193359375, + "objective/train/weight_avg": 1.000180721282959, + "objective/train/weighted_lm_loss": 3.147027015686035, + "objective/train/weights_max": 1.0260534286499023, + "objective/train/weights_min": 0.9103637337684631, + "theoretical_loss": 3.6845840485272205, + "tokens_seen": 904396800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007332263242375602, + "loss": 1.6622, + "theoretical_loss": 3.6843770572608507, + "tokens_seen": 904921088 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007329052969502408, + "loss": 1.628, + "theoretical_loss": 3.6839635348685222, + "tokens_seen": 905969664 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.007274040952324867, + "objective/train/docs_used": 520483, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4969489574432373, + "objective/train/original_loss": 3.4969489574432373, + "objective/train/theoretical_loss": 3.6839377100618, + "objective/train/tokens_used": 926495200, + "objective/train/value_avg": -0.020660400390625, + "objective/train/value_loss": 0.004844598472118378, + "objective/train/value_max": -0.0009660720825195312, + "objective/train/value_min": -0.96630859375, + "objective/train/value_reward_corr": 0.3817595333637909, + "objective/train/value_std": 0.036468505859375, + "objective/train/weight_avg": 1.000751256942749, + "objective/train/weighted_lm_loss": 3.4991302490234375, + "objective/train/weights_max": 1.0857833623886108, + "objective/train/weights_min": 0.9080186486244202, + "theoretical_loss": 3.6839377100618, + "tokens_seen": 906035200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007325842696629213, + "loss": 1.6564, + "theoretical_loss": 3.6835506246486105, + "tokens_seen": 907018240 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.004551814869046211, + "objective/train/docs_used": 521740, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8947019577026367, + "objective/train/original_loss": 2.8947019577026367, + "objective/train/theoretical_loss": 3.6832928659132724, + "objective/train/tokens_used": 928133600, + "objective/train/value_avg": -0.0229949951171875, + "objective/train/value_loss": 0.007642678916454315, + "objective/train/value_max": -0.0011034011840820312, + "objective/train/value_min": -0.728515625, + "objective/train/value_reward_corr": 0.3889356267907491, + "objective/train/value_std": 0.033294677734375, + "objective/train/weight_avg": 1.0004926919937134, + "objective/train/weighted_lm_loss": 2.8951616287231445, + "objective/train/weights_max": 1.0439282655715942, + "objective/train/weights_min": 0.9084192514419556, + "theoretical_loss": 3.6832928659132724, + "tokens_seen": 907673600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007322632423756019, + "loss": 1.6235, + "theoretical_loss": 3.6831383249887226, + "tokens_seen": 908066816 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007319422150882825, + "loss": 1.6281, + "theoretical_loss": 3.682726634282564, + "tokens_seen": 909115392 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.005253341514617205, + "objective/train/docs_used": 522301, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.160996198654175, + "objective/train/original_loss": 3.1609959602355957, + "objective/train/theoretical_loss": 3.682649509938836, + "objective/train/tokens_used": 929772000, + "objective/train/value_avg": -0.01488494873046875, + "objective/train/value_loss": 0.0013072665315121412, + "objective/train/value_max": -0.00030303001403808594, + "objective/train/value_min": -0.92724609375, + "objective/train/value_reward_corr": 0.4430253939956133, + "objective/train/value_std": 0.0214996337890625, + "objective/train/weight_avg": 1.000531792640686, + "objective/train/weighted_lm_loss": 3.162882089614868, + "objective/train/weights_max": 1.0286245346069336, + "objective/train/weights_min": 0.9164671897888184, + "theoretical_loss": 3.682649509938836, + "tokens_seen": 909312000 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731621187800963, + "loss": 1.7099, + "theoretical_loss": 3.682315550929917, + "tokens_seen": 910163968 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.005704361479729414, + "objective/train/docs_used": 523753, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9396400451660156, + "objective/train/original_loss": 2.939639091491699, + "objective/train/theoretical_loss": 3.6820076360319485, + "objective/train/tokens_used": 931410400, + "objective/train/value_avg": -0.013824462890625, + "objective/train/value_loss": 0.002631427487358451, + "objective/train/value_max": -0.0009474754333496094, + "objective/train/value_min": -0.84912109375, + "objective/train/value_reward_corr": 0.32607797227855706, + "objective/train/value_std": 0.0167694091796875, + "objective/train/weight_avg": 1.0005834102630615, + "objective/train/weighted_lm_loss": 2.941256523132324, + "objective/train/weights_max": 1.0332762002944946, + "objective/train/weights_min": 0.9087001085281372, + "theoretical_loss": 3.6820076360319485, + "tokens_seen": 910950400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007313001605136437, + "loss": 1.666, + "theoretical_loss": 3.6819050733366017, + "tokens_seen": 911212544 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007309791332263243, + "loss": 1.6555, + "theoretical_loss": 3.6814951999144547, + "tokens_seen": 912261120 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.004183572717010975, + "objective/train/docs_used": 524575, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.526602268218994, + "objective/train/original_loss": 3.526602268218994, + "objective/train/theoretical_loss": 3.681367238122049, + "objective/train/tokens_used": 933048800, + "objective/train/value_avg": -0.0186920166015625, + "objective/train/value_loss": 0.0032516124192625284, + "objective/train/value_max": -0.001056671142578125, + "objective/train/value_min": -0.86572265625, + "objective/train/value_reward_corr": 0.6519363019057237, + "objective/train/value_std": 0.033477783203125, + "objective/train/weight_avg": 1.000434398651123, + "objective/train/weighted_lm_loss": 3.527736186981201, + "objective/train/weights_max": 1.0344209671020508, + "objective/train/weights_min": 0.9087041020393372, + "theoretical_loss": 3.681367238122049, + "tokens_seen": 912588800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007306581059390049, + "loss": 1.6164, + "theoretical_loss": 3.681085929081294, + "tokens_seen": 913309696 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.008230924606323242, + "objective/train/docs_used": 525722, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.266392707824707, + "objective/train/original_loss": 3.2663931846618652, + "objective/train/theoretical_loss": 3.6807283101742865, + "objective/train/tokens_used": 934687200, + "objective/train/value_avg": -0.0184783935546875, + "objective/train/value_loss": 0.0024620380718261003, + "objective/train/value_max": -0.0005793571472167969, + "objective/train/value_min": -0.43994140625, + "objective/train/value_reward_corr": 0.43473922991333985, + "objective/train/value_std": 0.0276031494140625, + "objective/train/weight_avg": 1.0008352994918823, + "objective/train/weighted_lm_loss": 3.2691001892089844, + "objective/train/weights_max": 1.0371458530426025, + "objective/train/weights_min": 0.9184219241142273, + "theoretical_loss": 3.6807283101742865, + "tokens_seen": 914227200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007303370786516854, + "loss": 1.662, + "theoretical_loss": 3.680677259260892, + "tokens_seen": 914358272 + }, + { + "epoch": 0.28, + "learning_rate": 0.000730016051364366, + "loss": 1.6728, + "theoretical_loss": 3.6802691888829453, + "tokens_seen": 915406848 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.005263617727905512, + "objective/train/docs_used": 526330, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1414406299591064, + "objective/train/original_loss": 3.1414411067962646, + "objective/train/theoretical_loss": 3.6800908461892403, + "objective/train/tokens_used": 936325600, + "objective/train/value_avg": -0.0237274169921875, + "objective/train/value_loss": 0.002747091930359602, + "objective/train/value_max": -0.0009436607360839844, + "objective/train/value_min": -0.39404296875, + "objective/train/value_reward_corr": 0.22327404326585953, + "objective/train/value_std": 0.02459716796875, + "objective/train/weight_avg": 1.000540018081665, + "objective/train/weighted_lm_loss": 3.144192695617676, + "objective/train/weights_max": 1.0400091409683228, + "objective/train/weights_min": 0.9478491544723511, + "theoretical_loss": 3.6800908461892403, + "tokens_seen": 915865600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007296950240770466, + "loss": 1.6759, + "theoretical_loss": 3.679861716383046, + "tokens_seen": 916455424 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.007134995423257351, + "objective/train/docs_used": 527026, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1876261234283447, + "objective/train/original_loss": 3.187626600265503, + "objective/train/theoretical_loss": 3.6794548402026535, + "objective/train/tokens_used": 937964000, + "objective/train/value_avg": -0.01200103759765625, + "objective/train/value_loss": 0.0009923952165991068, + "objective/train/value_max": -0.0006933212280273438, + "objective/train/value_min": -0.12066650390625, + "objective/train/value_reward_corr": 0.11990653035030113, + "objective/train/value_std": 0.00972747802734375, + "objective/train/weight_avg": 1.000718355178833, + "objective/train/weighted_lm_loss": 3.1895511150360107, + "objective/train/weights_max": 1.0111180543899536, + "objective/train/weights_min": 0.909112274646759, + "theoretical_loss": 3.6794548402026535, + "tokens_seen": 917504000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007293739967897271, + "loss": 1.6589, + "theoretical_loss": 3.6794548402026535, + "tokens_seen": 917504000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007290529695024077, + "loss": 1.6625, + "theoretical_loss": 3.6790485587890642, + "tokens_seen": 918552576 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0033514807000756264, + "objective/train/docs_used": 528220, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.40006422996521, + "objective/train/original_loss": 3.40006422996521, + "objective/train/theoretical_loss": 3.678820286285162, + "objective/train/tokens_used": 939602400, + "objective/train/value_avg": -0.0129547119140625, + "objective/train/value_loss": 0.002994909416884184, + "objective/train/value_max": -0.0009622573852539062, + "objective/train/value_min": -0.49365234375, + "objective/train/value_reward_corr": 0.12769368457146932, + "objective/train/value_std": 0.01171112060546875, + "objective/train/weight_avg": 1.0003498792648315, + "objective/train/weighted_lm_loss": 3.4010212421417236, + "objective/train/weights_max": 1.0402673482894897, + "objective/train/weights_min": 0.9218307137489319, + "theoretical_loss": 3.678820286285162, + "tokens_seen": 919142400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007287319422150883, + "loss": 1.6708, + "theoretical_loss": 3.6786428705953855, + "tokens_seen": 919601152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007284109149277689, + "loss": 1.6313, + "theoretical_loss": 3.6782377740805043, + "tokens_seen": 920649728 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.005844503175467253, + "objective/train/docs_used": 528886, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.392643928527832, + "objective/train/original_loss": 3.3926432132720947, + "objective/train/theoretical_loss": 3.678187178542029, + "objective/train/tokens_used": 941240800, + "objective/train/value_avg": -0.0179290771484375, + "objective/train/value_loss": 0.005258332472294569, + "objective/train/value_max": -0.0009002685546875, + "objective/train/value_min": -0.82763671875, + "objective/train/value_reward_corr": 0.4053095797172095, + "objective/train/value_std": 0.03448486328125, + "objective/train/weight_avg": 1.0006102323532104, + "objective/train/weighted_lm_loss": 3.3948280811309814, + "objective/train/weights_max": 1.0588350296020508, + "objective/train/weights_min": 0.9072017073631287, + "theoretical_loss": 3.678187178542029, + "tokens_seen": 920780800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007280898876404494, + "loss": 1.6796, + "theoretical_loss": 3.6778332677090617, + "tokens_seen": 921698304 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0062701888382434845, + "objective/train/docs_used": 530064, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6922504901885986, + "objective/train/original_loss": 3.6922507286071777, + "objective/train/theoretical_loss": 3.6775555111128835, + "objective/train/tokens_used": 942879200, + "objective/train/value_avg": -0.023284912109375, + "objective/train/value_loss": 0.007472959812730551, + "objective/train/value_max": -0.0008625984191894531, + "objective/train/value_min": -0.66748046875, + "objective/train/value_reward_corr": 0.23231740915810115, + "objective/train/value_std": 0.0333251953125, + "objective/train/weight_avg": 1.0006636381149292, + "objective/train/weighted_lm_loss": 3.6951353549957275, + "objective/train/weights_max": 1.0373334884643555, + "objective/train/weights_min": 0.9074615240097046, + "theoretical_loss": 3.6775555111128835, + "tokens_seen": 922419200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00072776886035313, + "loss": 1.6716, + "theoretical_loss": 3.6774293499514243, + "tokens_seen": 922746880 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007274478330658107, + "loss": 1.6267, + "theoretical_loss": 3.6770260192836544, + "tokens_seen": 923795456 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.000824926421046257, + "objective/train/docs_used": 530632, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.728451728820801, + "objective/train/original_loss": 3.7284514904022217, + "objective/train/theoretical_loss": 3.6769252781714576, + "objective/train/tokens_used": 944517600, + "objective/train/value_avg": -0.0281219482421875, + "objective/train/value_loss": 0.013424763455986977, + "objective/train/value_max": -0.0010166168212890625, + "objective/train/value_min": -0.96142578125, + "objective/train/value_reward_corr": 0.5992362743362185, + "objective/train/value_std": 0.0689697265625, + "objective/train/weight_avg": 1.0001484155654907, + "objective/train/weighted_lm_loss": 3.728668212890625, + "objective/train/weights_max": 1.092089295387268, + "objective/train/weights_min": 0.9073042273521423, + "theoretical_loss": 3.6769252781714576, + "tokens_seen": 924057600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007271268057784913, + "loss": 1.6501, + "theoretical_loss": 3.6766232741874845, + "tokens_seen": 924844032 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.006308781914412975, + "objective/train/docs_used": 532017, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1944971084594727, + "objective/train/original_loss": 3.1944966316223145, + "objective/train/theoretical_loss": 3.6762964739253263, + "objective/train/tokens_used": 946156000, + "objective/train/value_avg": -0.0159149169921875, + "objective/train/value_loss": 0.0035839159972965717, + "objective/train/value_max": -0.0009927749633789062, + "objective/train/value_min": -0.87548828125, + "objective/train/value_reward_corr": 0.37265148500114226, + "objective/train/value_std": 0.0257415771484375, + "objective/train/weight_avg": 1.0006483793258667, + "objective/train/weighted_lm_loss": 3.1964337825775146, + "objective/train/weights_max": 1.058646321296692, + "objective/train/weights_min": 0.9065235257148743, + "theoretical_loss": 3.6762964739253263, + "tokens_seen": 925696000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007268057784911718, + "loss": 1.6375, + "theoretical_loss": 3.67622111315029, + "tokens_seen": 925892608 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007264847512038524, + "loss": 1.6517, + "theoretical_loss": 3.6758195346650595, + "tokens_seen": 926941184 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.01099795289337635, + "objective/train/docs_used": 532753, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7550652027130127, + "objective/train/original_loss": 3.755065441131592, + "objective/train/theoretical_loss": 3.6756690926156557, + "objective/train/tokens_used": 947794400, + "objective/train/value_avg": -0.01329803466796875, + "objective/train/value_loss": 0.00032906318665482104, + "objective/train/value_max": -0.0006718635559082031, + "objective/train/value_min": -0.2366943359375, + "objective/train/value_reward_corr": 0.2331778508358661, + "objective/train/value_std": 0.0131988525390625, + "objective/train/weight_avg": 1.0011014938354492, + "objective/train/weighted_lm_loss": 3.7591302394866943, + "objective/train/weights_max": 1.0183143615722656, + "objective/train/weights_min": 0.9803495407104492, + "theoretical_loss": 3.6756690926156557, + "tokens_seen": 927334400 + }, + { + "epoch": 0.28, + "learning_rate": 0.000726163723916533, + "loss": 1.6705, + "theoretical_loss": 3.6754185372303705, + "tokens_seen": 927989760 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0074889506213366985, + "objective/train/docs_used": 534166, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.399055004119873, + "objective/train/original_loss": 3.399055242538452, + "objective/train/theoretical_loss": 3.6750431285169465, + "objective/train/tokens_used": 949432800, + "objective/train/value_avg": -0.0284271240234375, + "objective/train/value_loss": 0.009678613394498825, + "objective/train/value_max": -0.000690460205078125, + "objective/train/value_min": -0.97265625, + "objective/train/value_reward_corr": 0.5519373999748098, + "objective/train/value_std": 0.06768798828125, + "objective/train/weight_avg": 1.0007965564727783, + "objective/train/weighted_lm_loss": 3.401841640472412, + "objective/train/weights_max": 1.0840449333190918, + "objective/train/weights_min": 0.9081435799598694, + "theoretical_loss": 3.6750431285169465, + "tokens_seen": 928972800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007258426966292135, + "loss": 1.6319, + "theoretical_loss": 3.6750181193503604, + "tokens_seen": 929038336 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007255216693418941, + "loss": 1.6337, + "theoretical_loss": 3.6746182795347013, + "tokens_seen": 930086912 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.009882732294499874, + "objective/train/docs_used": 534638, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.977640390396118, + "objective/train/original_loss": 2.977640390396118, + "objective/train/theoretical_loss": 3.674418575936782, + "objective/train/tokens_used": 951071200, + "objective/train/value_avg": -0.0233154296875, + "objective/train/value_loss": 0.014271278865635395, + "objective/train/value_max": -0.0007352828979492188, + "objective/train/value_min": -0.95849609375, + "objective/train/value_reward_corr": 0.42834016168534583, + "objective/train/value_std": 0.04815673828125, + "objective/train/weight_avg": 0.9990816712379456, + "objective/train/weighted_lm_loss": 2.973231315612793, + "objective/train/weights_max": 1.0740610361099243, + "objective/train/weights_min": 0.9082794189453125, + "theoretical_loss": 3.674418575936782, + "tokens_seen": 930611200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007252006420545747, + "loss": 1.6418, + "theoretical_loss": 3.674219016298571, + "tokens_seen": 931135488 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007248796147672553, + "loss": 1.6113, + "theoretical_loss": 3.673820328162628, + "tokens_seen": 932184064 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.004559147637337446, + "objective/train/docs_used": 535899, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4812982082366943, + "objective/train/original_loss": 3.481297731399536, + "objective/train/theoretical_loss": 3.6737954292155814, + "objective/train/tokens_used": 952709600, + "objective/train/value_avg": -0.0272674560546875, + "objective/train/value_loss": 0.010019585490226746, + "objective/train/value_max": -0.00064849853515625, + "objective/train/value_min": -0.93701171875, + "objective/train/value_reward_corr": 0.5019277347624608, + "objective/train/value_std": 0.0552978515625, + "objective/train/weight_avg": 1.0005050897598267, + "objective/train/weighted_lm_loss": 3.482516050338745, + "objective/train/weights_max": 1.070268988609314, + "objective/train/weights_min": 0.9069929718971252, + "theoretical_loss": 3.6737954292155814, + "tokens_seen": 932249600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007245585874799358, + "loss": 1.6439, + "theoretical_loss": 3.673422213652986, + "tokens_seen": 933232640 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0031652289908379316, + "objective/train/docs_used": 536516, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.052739143371582, + "objective/train/original_loss": 3.052739143371582, + "objective/train/theoretical_loss": 3.6731736827263513, + "objective/train/tokens_used": 954348000, + "objective/train/value_avg": -0.015869140625, + "objective/train/value_loss": 0.0035554233472794294, + "objective/train/value_max": -0.0008726119995117188, + "objective/train/value_min": -0.583984375, + "objective/train/value_reward_corr": 0.3123277858541455, + "objective/train/value_std": 0.024200439453125, + "objective/train/weight_avg": 1.0003340244293213, + "objective/train/weighted_lm_loss": 3.053396224975586, + "objective/train/weights_max": 1.0445888042449951, + "objective/train/weights_min": 0.9106366038322449, + "theoretical_loss": 3.6731736827263513, + "tokens_seen": 933888000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007242375601926164, + "loss": 1.6298, + "theoretical_loss": 3.673024671301186, + "tokens_seen": 934281216 + }, + { + "epoch": 0.28, + "learning_rate": 0.000723916532905297, + "loss": 1.6888, + "theoretical_loss": 3.6726276996441705, + "tokens_seen": 935329792 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.017895888537168503, + "objective/train/docs_used": 537462, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4974586963653564, + "objective/train/original_loss": 3.4974586963653564, + "objective/train/theoretical_loss": 3.6725533308744422, + "objective/train/tokens_used": 955986400, + "objective/train/value_avg": -0.03228759765625, + "objective/train/value_loss": 0.005844737868756056, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.91015625, + "objective/train/value_reward_corr": 0.550604174908925, + "objective/train/value_std": 0.07861328125, + "objective/train/weight_avg": 1.0018190145492554, + "objective/train/weighted_lm_loss": 3.503359794616699, + "objective/train/weights_max": 1.091378092765808, + "objective/train/weights_min": 0.9388439655303955, + "theoretical_loss": 3.6725533308744422, + "tokens_seen": 935526400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007235955056179774, + "loss": 1.6736, + "theoretical_loss": 3.6722312972242594, + "tokens_seen": 936378368 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.009473737329244614, + "objective/train/docs_used": 538123, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.367907762527466, + "objective/train/original_loss": 3.367907762527466, + "objective/train/theoretical_loss": 3.6719343680973067, + "objective/train/tokens_used": 957624800, + "objective/train/value_avg": -0.0203399658203125, + "objective/train/value_loss": 0.0019679120741784573, + "objective/train/value_max": -0.0008106231689453125, + "objective/train/value_min": -0.95361328125, + "objective/train/value_reward_corr": 0.43167199947901536, + "objective/train/value_std": 0.032379150390625, + "objective/train/weight_avg": 1.0009571313858032, + "objective/train/weighted_lm_loss": 3.372159004211426, + "objective/train/weights_max": 1.072823405265808, + "objective/train/weights_min": 0.911491334438324, + "theoretical_loss": 3.6719343680973067, + "tokens_seen": 937164800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007232744783306581, + "loss": 1.6346, + "theoretical_loss": 3.6718354625891205, + "tokens_seen": 937426944 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007229534510433387, + "loss": 1.6671, + "theoretical_loss": 3.6714401942917485, + "tokens_seen": 938475520 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.00424997927621007, + "objective/train/docs_used": 539283, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7973861694335938, + "objective/train/original_loss": 2.797386407852173, + "objective/train/theoretical_loss": 3.6713167888642575, + "objective/train/tokens_used": 959263200, + "objective/train/value_avg": -0.0244598388671875, + "objective/train/value_loss": 0.006116148084402084, + "objective/train/value_max": -0.0011119842529296875, + "objective/train/value_min": -0.66064453125, + "objective/train/value_reward_corr": 0.47056228658584043, + "objective/train/value_std": 0.037567138671875, + "objective/train/weight_avg": 1.0004551410675049, + "objective/train/weighted_lm_loss": 2.79842472076416, + "objective/train/weights_max": 1.0461580753326416, + "objective/train/weights_min": 0.9203583002090454, + "theoretical_loss": 3.6713167888642575, + "tokens_seen": 938803200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007226324237560193, + "loss": 1.6544, + "theoretical_loss": 3.6710454908904366, + "tokens_seen": 939524096 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.00761455949395895, + "objective/train/docs_used": 539932, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.200578212738037, + "objective/train/original_loss": 3.200577974319458, + "objective/train/theoretical_loss": 3.6707005876762313, + "objective/train/tokens_used": 960901600, + "objective/train/value_avg": -0.0235443115234375, + "objective/train/value_loss": 0.01491651963442564, + "objective/train/value_max": -0.0010652542114257812, + "objective/train/value_min": -0.51318359375, + "objective/train/value_reward_corr": 0.3699694966676353, + "objective/train/value_std": 0.0271148681640625, + "objective/train/weight_avg": 0.9993113875389099, + "objective/train/weighted_lm_loss": 3.1971287727355957, + "objective/train/weights_max": 1.0452347993850708, + "objective/train/weights_min": 0.9068694710731506, + "theoretical_loss": 3.6707005876762313, + "tokens_seen": 940441600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007223113964686998, + "loss": 1.6908, + "theoretical_loss": 3.6706513509487513, + "tokens_seen": 940572672 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007219903691813804, + "loss": 1.6458, + "theoretical_loss": 3.6702577730355084, + "tokens_seen": 941621248 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.005874961148947477, + "objective/train/docs_used": 541311, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1676270961761475, + "objective/train/original_loss": 3.1676273345947266, + "objective/train/theoretical_loss": 3.6700857590655547, + "objective/train/tokens_used": 962540000, + "objective/train/value_avg": -0.0200042724609375, + "objective/train/value_loss": 0.004725985694676638, + "objective/train/value_max": -0.0009889602661132812, + "objective/train/value_min": -0.9541015625, + "objective/train/value_reward_corr": 0.4696669306887476, + "objective/train/value_std": 0.035888671875, + "objective/train/weight_avg": 1.000610589981079, + "objective/train/weighted_lm_loss": 3.16927170753479, + "objective/train/weights_max": 1.0687893629074097, + "objective/train/weights_min": 0.9087934494018555, + "theoretical_loss": 3.6700857590655547, + "tokens_seen": 942080000 + }, + { + "epoch": 0.29, + "learning_rate": 0.000721669341894061, + "loss": 1.6383, + "theoretical_loss": 3.6698647557247472, + "tokens_seen": 942669824 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": -0.0028178871143609285, + "objective/train/docs_used": 542104, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.920156717300415, + "objective/train/original_loss": 2.920156955718994, + "objective/train/theoretical_loss": 3.6694722975957066, + "objective/train/tokens_used": 964178400, + "objective/train/value_avg": -0.016876220703125, + "objective/train/value_loss": 0.005192212760448456, + "objective/train/value_max": -0.0008492469787597656, + "objective/train/value_min": -0.296875, + "objective/train/value_reward_corr": 0.305626914973795, + "objective/train/value_std": 0.023681640625, + "objective/train/weight_avg": 0.9997437000274658, + "objective/train/weighted_lm_loss": 2.9184012413024902, + "objective/train/weights_max": 1.021337628364563, + "objective/train/weights_min": 0.9108710885047913, + "theoretical_loss": 3.6694722975957066, + "tokens_seen": 943718400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007213483146067415, + "loss": 1.6422, + "theoretical_loss": 3.6694722975957066, + "tokens_seen": 943718400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007210272873194221, + "loss": 1.645, + "theoretical_loss": 3.6690803972327988, + "tokens_seen": 944766976 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.006074472330510616, + "objective/train/docs_used": 543471, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.9069812297821045, + "objective/train/original_loss": 3.9069812297821045, + "objective/train/theoretical_loss": 3.6688601978610915, + "objective/train/tokens_used": 965816800, + "objective/train/value_avg": -0.0253448486328125, + "objective/train/value_loss": 0.006201003212481737, + "objective/train/value_max": -0.0008797645568847656, + "objective/train/value_min": -0.55712890625, + "objective/train/value_reward_corr": 0.5015527166879874, + "objective/train/value_std": 0.0426025390625, + "objective/train/weight_avg": 1.0006380081176758, + "objective/train/weighted_lm_loss": 3.9098072052001953, + "objective/train/weights_max": 1.0443190336227417, + "objective/train/weights_min": 0.9091728329658508, + "theoretical_loss": 3.6688601978610915, + "tokens_seen": 945356800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007207062600321027, + "loss": 1.646, + "theoretical_loss": 3.6686890532255862, + "tokens_seen": 945815552 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007203852327447833, + "loss": 1.6212, + "theoretical_loss": 3.6682982641687563, + "tokens_seen": 946864128 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.01763715222477913, + "objective/train/docs_used": 544140, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.431184768676758, + "objective/train/original_loss": 3.431184768676758, + "objective/train/theoretical_loss": 3.668249454486809, + "objective/train/tokens_used": 967455200, + "objective/train/value_avg": -0.03411865234375, + "objective/train/value_loss": 0.006673434749245644, + "objective/train/value_max": -0.0007042884826660156, + "objective/train/value_min": -0.74951171875, + "objective/train/value_reward_corr": 0.4070672291488422, + "objective/train/value_std": 0.06097412109375, + "objective/train/weight_avg": 1.001796841621399, + "objective/train/weighted_lm_loss": 3.4385030269622803, + "objective/train/weights_max": 1.0630983114242554, + "objective/train/weights_min": 0.9092164039611816, + "theoretical_loss": 3.668249454486809, + "tokens_seen": 946995200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007200642054574638, + "loss": 1.6333, + "theoretical_loss": 3.6679080286620973, + "tokens_seen": 947912704 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.002389028202742338, + "objective/train/docs_used": 544543, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.236694574356079, + "objective/train/original_loss": 3.2366950511932373, + "objective/train/theoretical_loss": 3.667640062128423, + "objective/train/tokens_used": 969093600, + "objective/train/value_avg": -0.0165252685546875, + "objective/train/value_loss": 0.004429771099239588, + "objective/train/value_max": -0.0010366439819335938, + "objective/train/value_min": -0.671875, + "objective/train/value_reward_corr": 0.3004399704702335, + "objective/train/value_std": 0.0172882080078125, + "objective/train/weight_avg": 1.000260591506958, + "objective/train/weighted_lm_loss": 3.2363121509552, + "objective/train/weights_max": 1.0257302522659302, + "objective/train/weights_min": 0.9081550240516663, + "theoretical_loss": 3.667640062128423, + "tokens_seen": 948633600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007197431781701444, + "loss": 1.6615, + "theoretical_loss": 3.6675183453104747, + "tokens_seen": 948961280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007194221508828251, + "loss": 1.6682, + "theoretical_loss": 3.6671292127238067, + "tokens_seen": 950009856 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.007163960486650467, + "objective/train/docs_used": 545942, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.293698310852051, + "objective/train/original_loss": 3.2936980724334717, + "objective/train/theoretical_loss": 3.667032015471743, + "objective/train/tokens_used": 970732000, + "objective/train/value_avg": -0.0163116455078125, + "objective/train/value_loss": 0.0031513995490968227, + "objective/train/value_max": -0.0010204315185546875, + "objective/train/value_min": -0.57666015625, + "objective/train/value_reward_corr": 0.2758684510033611, + "objective/train/value_std": 0.0169525146484375, + "objective/train/weight_avg": 1.0007318258285522, + "objective/train/weighted_lm_loss": 3.295941114425659, + "objective/train/weights_max": 1.0451414585113525, + "objective/train/weights_min": 0.9184430241584778, + "theoretical_loss": 3.667032015471743, + "tokens_seen": 950272000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007191011235955056, + "loss": 1.6557, + "theoretical_loss": 3.66674062951704, + "tokens_seen": 951058432 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.004677454009652138, + "objective/train/docs_used": 546639, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0606818199157715, + "objective/train/original_loss": 3.0606818199157715, + "objective/train/theoretical_loss": 3.6664253092325962, + "objective/train/tokens_used": 972370400, + "objective/train/value_avg": -0.01715087890625, + "objective/train/value_loss": 0.003671941114589572, + "objective/train/value_max": -0.0005793571472167969, + "objective/train/value_min": -0.654296875, + "objective/train/value_reward_corr": 0.26442999259557354, + "objective/train/value_std": 0.0198822021484375, + "objective/train/weight_avg": 1.0004857778549194, + "objective/train/weighted_lm_loss": 3.061527967453003, + "objective/train/weights_max": 1.038144826889038, + "objective/train/weights_min": 0.908807098865509, + "theoretical_loss": 3.6664253092325962, + "tokens_seen": 951910400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007187800963081862, + "loss": 1.6696, + "theoretical_loss": 3.666352594310127, + "tokens_seen": 952107008 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007184590690208668, + "loss": 1.6073, + "theoretical_loss": 3.6659651057280023, + "tokens_seen": 953155584 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.005715177394449711, + "objective/train/docs_used": 547886, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.483006477355957, + "objective/train/original_loss": 2.483006238937378, + "objective/train/theoretical_loss": 3.66581993815661, + "objective/train/tokens_used": 974008800, + "objective/train/value_avg": -0.0543212890625, + "objective/train/value_loss": 0.005399605259299278, + "objective/train/value_max": -0.0008726119995117188, + "objective/train/value_min": -0.853515625, + "objective/train/value_reward_corr": 0.8973092240701431, + "objective/train/value_std": 0.14794921875, + "objective/train/weight_avg": 1.000598430633545, + "objective/train/weighted_lm_loss": 2.4843411445617676, + "objective/train/weights_max": 1.0762567520141602, + "objective/train/weights_min": 0.9135990738868713, + "theoretical_loss": 3.66581993815661, + "tokens_seen": 953548800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007181380417335474, + "loss": 1.6134, + "theoretical_loss": 3.665578162400558, + "tokens_seen": 954204160 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.005014305002987385, + "objective/train/docs_used": 548540, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.340383529663086, + "objective/train/original_loss": 3.340383768081665, + "objective/train/theoretical_loss": 3.66521589701899, + "objective/train/tokens_used": 975647200, + "objective/train/value_avg": -0.01309967041015625, + "objective/train/value_loss": 0.0013751566875725985, + "objective/train/value_max": -0.0006933212280273438, + "objective/train/value_min": -0.5927734375, + "objective/train/value_reward_corr": 0.44363586503923735, + "objective/train/value_std": 0.016204833984375, + "objective/train/weight_avg": 1.000508189201355, + "objective/train/weighted_lm_loss": 3.342303514480591, + "objective/train/weights_max": 1.0159785747528076, + "objective/train/weights_min": 0.9065452218055725, + "theoretical_loss": 3.66521589701899, + "tokens_seen": 955187200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007178170144462279, + "loss": 1.6348, + "theoretical_loss": 3.66519176296262, + "tokens_seen": 955252736 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007174959871589085, + "loss": 1.6604, + "theoretical_loss": 3.664805906053928, + "tokens_seen": 956301312 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.003965480253100395, + "objective/train/docs_used": 549880, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3218698501586914, + "objective/train/original_loss": 3.3218698501586914, + "objective/train/theoretical_loss": 3.664613180624306, + "objective/train/tokens_used": 977285600, + "objective/train/value_avg": -0.0302734375, + "objective/train/value_loss": 0.010144227184355259, + "objective/train/value_max": -0.0007853507995605469, + "objective/train/value_min": -0.9814453125, + "objective/train/value_reward_corr": 0.5709226490679884, + "objective/train/value_std": 0.07720947265625, + "objective/train/weight_avg": 1.0004465579986572, + "objective/train/weighted_lm_loss": 3.323756694793701, + "objective/train/weights_max": 1.0752249956130981, + "objective/train/weights_min": 0.9078832864761353, + "theoretical_loss": 3.664613180624306, + "tokens_seen": 956825600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007171749598715891, + "loss": 1.6139, + "theoretical_loss": 3.6644205903191107, + "tokens_seen": 957349888 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007168539325842697, + "loss": 1.6498, + "theoretical_loss": 3.664035814407661, + "tokens_seen": 958398464 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0102612916380167, + "objective/train/docs_used": 550706, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7311582565307617, + "objective/train/original_loss": 3.7311577796936035, + "objective/train/theoretical_loss": 3.6640117838062745, + "objective/train/tokens_used": 978924000, + "objective/train/value_avg": -0.0203857421875, + "objective/train/value_loss": 0.0011450377060100436, + "objective/train/value_max": -0.0008263587951660156, + "objective/train/value_min": -0.363525390625, + "objective/train/value_reward_corr": 0.5168298373394647, + "objective/train/value_std": 0.0323486328125, + "objective/train/weight_avg": 1.0010318756103516, + "objective/train/weighted_lm_loss": 3.7353811264038086, + "objective/train/weights_max": 1.0346570014953613, + "objective/train/weights_min": 0.9431256055831909, + "theoretical_loss": 3.6640117838062745, + "tokens_seen": 958464000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007165329052969502, + "loss": 1.6245, + "theoretical_loss": 3.663651576973915, + "tokens_seen": 959447040 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0017246684292331338, + "objective/train/docs_used": 551132, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1371166706085205, + "objective/train/original_loss": 3.1371164321899414, + "objective/train/theoretical_loss": 3.663411701427548, + "objective/train/tokens_used": 980562400, + "objective/train/value_avg": -0.0230560302734375, + "objective/train/value_loss": 0.008268183097243309, + "objective/train/value_max": -0.0007266998291015625, + "objective/train/value_min": -0.818359375, + "objective/train/value_reward_corr": 0.6407634007253046, + "objective/train/value_std": 0.04718017578125, + "objective/train/weight_avg": 1.0002129077911377, + "objective/train/weighted_lm_loss": 3.1386632919311523, + "objective/train/weights_max": 1.0374276638031006, + "objective/train/weights_min": 0.913992702960968, + "theoretical_loss": 3.663411701427548, + "tokens_seen": 960102400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007162118780096308, + "loss": 1.6336, + "theoretical_loss": 3.66326787667703, + "tokens_seen": 960495616 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007158908507223114, + "loss": 1.6359, + "theoretical_loss": 3.6628847121809613, + "tokens_seen": 961544192 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0065648495219647884, + "objective/train/docs_used": 552317, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.537834882736206, + "objective/train/original_loss": 3.537834882736206, + "objective/train/theoretical_loss": 3.662812928379502, + "objective/train/tokens_used": 982200800, + "objective/train/value_avg": -0.0226287841796875, + "objective/train/value_loss": 0.002850189106538892, + "objective/train/value_max": -0.0009851455688476562, + "objective/train/value_min": -0.72216796875, + "objective/train/value_reward_corr": 0.38560421111900806, + "objective/train/value_std": 0.036468505859375, + "objective/train/weight_avg": 1.0006705522537231, + "objective/train/weighted_lm_loss": 3.5411980152130127, + "objective/train/weights_max": 1.0587676763534546, + "objective/train/weights_min": 0.9252673983573914, + "theoretical_loss": 3.662812928379502, + "tokens_seen": 961740800 + }, + { + "epoch": 0.29, + "learning_rate": 0.000715569823434992, + "loss": 1.6608, + "theoretical_loss": 3.662502082154439, + "tokens_seen": 962592768 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.005744043737649918, + "objective/train/docs_used": 552873, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.49574875831604, + "objective/train/original_loss": 3.495748519897461, + "objective/train/theoretical_loss": 3.662215459582027, + "objective/train/tokens_used": 983839200, + "objective/train/value_avg": -0.01131439208984375, + "objective/train/value_loss": 0.002662302926182747, + "objective/train/value_max": -0.0008296966552734375, + "objective/train/value_min": -0.94677734375, + "objective/train/value_reward_corr": 0.3392842414769666, + "objective/train/value_std": 0.0196685791015625, + "objective/train/weight_avg": 1.0005873441696167, + "objective/train/weighted_lm_loss": 3.497471332550049, + "objective/train/weights_max": 1.046368956565857, + "objective/train/weights_min": 0.9075832366943359, + "theoretical_loss": 3.662215459582027, + "tokens_seen": 963379200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007152487961476726, + "loss": 1.6693, + "theoretical_loss": 3.662119985270947, + "tokens_seen": 963641344 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007149277688603532, + "loss": 1.6102, + "theoretical_loss": 3.6617384202087004, + "tokens_seen": 964689920 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.012177059426903725, + "objective/train/docs_used": 554313, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.669315814971924, + "objective/train/original_loss": 3.669315814971924, + "objective/train/theoretical_loss": 3.66161928998332, + "objective/train/tokens_used": 985477600, + "objective/train/value_avg": -0.02899169921875, + "objective/train/value_loss": 0.006317170336842537, + "objective/train/value_max": -0.0010166168212890625, + "objective/train/value_min": -0.95361328125, + "objective/train/value_reward_corr": 0.4910235692931521, + "objective/train/value_std": 0.044342041015625, + "objective/train/weight_avg": 1.0012487173080444, + "objective/train/weighted_lm_loss": 3.6741936206817627, + "objective/train/weights_max": 1.0594065189361572, + "objective/train/weights_min": 0.9067437648773193, + "theoretical_loss": 3.66161928998332, + "tokens_seen": 965017600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007146067415730338, + "loss": 1.6089, + "theoretical_loss": 3.6613573856506236, + "tokens_seen": 965738496 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.012211798690259457, + "objective/train/docs_used": 554946, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.953223943710327, + "objective/train/original_loss": 2.953223705291748, + "objective/train/theoretical_loss": 3.661024414559681, + "objective/train/tokens_used": 987116000, + "objective/train/value_avg": -0.0311126708984375, + "objective/train/value_loss": 0.007003395352512598, + "objective/train/value_max": -0.0009813308715820312, + "objective/train/value_min": -0.85693359375, + "objective/train/value_reward_corr": 0.560620921171309, + "objective/train/value_std": 0.06689453125, + "objective/train/weight_avg": 1.001255750656128, + "objective/train/weighted_lm_loss": 2.9572110176086426, + "objective/train/weights_max": 1.0654137134552002, + "objective/train/weights_min": 0.9151504635810852, + "theoretical_loss": 3.661024414559681, + "tokens_seen": 966656000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007142857142857143, + "loss": 1.586, + "theoretical_loss": 3.6609768802843274, + "tokens_seen": 966787072 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007139646869983949, + "loss": 1.6147, + "theoretical_loss": 3.660596902802089, + "tokens_seen": 967835648 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.00753059471026063, + "objective/train/docs_used": 556500, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.595669746398926, + "objective/train/original_loss": 3.5956695079803467, + "objective/train/theoretical_loss": 3.6604308283153073, + "objective/train/tokens_used": 988754400, + "objective/train/value_avg": -0.0159759521484375, + "objective/train/value_loss": 0.001696893246844411, + "objective/train/value_max": -0.0008831024169921875, + "objective/train/value_min": -0.413330078125, + "objective/train/value_reward_corr": 0.10455683174269753, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.0007615089416504, + "objective/train/weighted_lm_loss": 3.599228858947754, + "objective/train/weights_max": 1.0361781120300293, + "objective/train/weights_min": 0.9201357960700989, + "theoretical_loss": 3.6604308283153073, + "tokens_seen": 968294400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007136436597110755, + "loss": 1.6212, + "theoretical_loss": 3.66021745190083, + "tokens_seen": 968884224 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.009306278079748154, + "objective/train/docs_used": 557201, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.232182502746582, + "objective/train/original_loss": 3.232182502746582, + "objective/train/theoretical_loss": 3.6598385262820923, + "objective/train/tokens_used": 990392800, + "objective/train/value_avg": -0.0147552490234375, + "objective/train/value_loss": 0.0010293611558154225, + "objective/train/value_max": -0.00046563148498535156, + "objective/train/value_min": -0.276611328125, + "objective/train/value_reward_corr": 0.37774636459008826, + "objective/train/value_std": 0.0178070068359375, + "objective/train/weight_avg": 1.000935673713684, + "objective/train/weighted_lm_loss": 3.2353475093841553, + "objective/train/weights_max": 1.0167946815490723, + "objective/train/weights_min": 0.9415488839149475, + "theoretical_loss": 3.6598385262820923, + "tokens_seen": 969932800 + }, + { + "epoch": 0.29, + "learning_rate": 0.000713322632423756, + "loss": 1.609, + "theoretical_loss": 3.6598385262820923, + "tokens_seen": 969932800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007130016051364366, + "loss": 1.6244, + "theoretical_loss": 3.659460124652022, + "tokens_seen": 970981376 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.000126383631140925, + "objective/train/docs_used": 558372, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0883970260620117, + "objective/train/original_loss": 3.088397264480591, + "objective/train/theoretical_loss": 3.6592475035194267, + "objective/train/tokens_used": 992031200, + "objective/train/value_avg": -0.018218994140625, + "objective/train/value_loss": 0.005162614397704601, + "objective/train/value_max": -0.0011072158813476562, + "objective/train/value_min": -0.375732421875, + "objective/train/value_reward_corr": 0.30167130709869217, + "objective/train/value_std": 0.020538330078125, + "objective/train/weight_avg": 1.0000380277633667, + "objective/train/weighted_lm_loss": 3.0892438888549805, + "objective/train/weights_max": 1.0361661911010742, + "objective/train/weights_min": 0.9183050990104675, + "theoretical_loss": 3.6592475035194267, + "tokens_seen": 971571200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007126805778491172, + "loss": 1.6174, + "theoretical_loss": 3.6590822457213426, + "tokens_seen": 972029952 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007123595505617978, + "loss": 1.6259, + "theoretical_loss": 3.658704888205337, + "tokens_seen": 973078528 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0025406484492123127, + "objective/train/docs_used": 558840, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6066176891326904, + "objective/train/original_loss": 3.6066174507141113, + "objective/train/theoretical_loss": 3.6586577551139974, + "objective/train/tokens_used": 993669600, + "objective/train/value_avg": -0.0136260986328125, + "objective/train/value_loss": 0.005149135831743479, + "objective/train/value_max": -0.0005660057067871094, + "objective/train/value_min": -0.7041015625, + "objective/train/value_reward_corr": 0.3121502659211949, + "objective/train/value_std": 0.0177764892578125, + "objective/train/weight_avg": 1.000279188156128, + "objective/train/weighted_lm_loss": 3.6070542335510254, + "objective/train/weights_max": 1.047648310661316, + "objective/train/weights_min": 0.9058650135993958, + "theoretical_loss": 3.6586577551139974, + "tokens_seen": 973209600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007120385232744783, + "loss": 1.6213, + "theoretical_loss": 3.658328050823826, + "tokens_seen": 974127104 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": -0.0028041524346917868, + "objective/train/docs_used": 560118, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4780991077423096, + "objective/train/original_loss": 3.4780993461608887, + "objective/train/theoretical_loss": 3.658069276179595, + "objective/train/tokens_used": 995308000, + "objective/train/value_avg": -0.0256500244140625, + "objective/train/value_loss": 0.012923583388328552, + "objective/train/value_max": -0.0009112358093261719, + "objective/train/value_min": -0.92724609375, + "objective/train/value_reward_corr": 0.585730033281146, + "objective/train/value_std": 0.06304931640625, + "objective/train/weight_avg": 0.9997828602790833, + "objective/train/weighted_lm_loss": 3.47676157951355, + "objective/train/weights_max": 1.0514100790023804, + "objective/train/weights_min": 0.9091500639915466, + "theoretical_loss": 3.658069276179595, + "tokens_seen": 974848000 + }, + { + "epoch": 0.3, + "learning_rate": 0.000711717495987159, + "loss": 1.6431, + "theoretical_loss": 3.657951732301148, + "tokens_seen": 975175680 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007113964686998396, + "loss": 1.6019, + "theoretical_loss": 3.657575931366135, + "tokens_seen": 976224256 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0064770798198878765, + "objective/train/docs_used": 560789, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.575777053833008, + "objective/train/original_loss": 3.5757765769958496, + "objective/train/theoretical_loss": 3.657482061856916, + "objective/train/tokens_used": 996946400, + "objective/train/value_avg": -0.014739990234375, + "objective/train/value_loss": 0.0025014150887727737, + "objective/train/value_max": -0.0008072853088378906, + "objective/train/value_min": -0.316162109375, + "objective/train/value_reward_corr": 0.17954567244364558, + "objective/train/value_std": 0.01528167724609375, + "objective/train/weight_avg": 1.0006599426269531, + "objective/train/weighted_lm_loss": 3.577934980392456, + "objective/train/weights_max": 1.0245338678359985, + "objective/train/weights_min": 0.9212753772735596, + "theoretical_loss": 3.657482061856916, + "tokens_seen": 976486400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007110754414125201, + "loss": 1.6413, + "theoretical_loss": 3.6572006467520968, + "tokens_seen": 977272832 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": -0.005192920565605164, + "objective/train/docs_used": 561934, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6313371658325195, + "objective/train/original_loss": 3.6313371658325195, + "objective/train/theoretical_loss": 3.6568961073133703, + "objective/train/tokens_used": 998584800, + "objective/train/value_avg": -0.0256805419921875, + "objective/train/value_loss": 0.01174029242247343, + "objective/train/value_max": -0.0006117820739746094, + "objective/train/value_min": -0.95654296875, + "objective/train/value_reward_corr": 0.3931819718862184, + "objective/train/value_std": 0.03961181640625, + "objective/train/weight_avg": 0.9995383024215698, + "objective/train/weighted_lm_loss": 3.6299545764923096, + "objective/train/weights_max": 1.0408581495285034, + "objective/train/weights_min": 0.9067038297653198, + "theoretical_loss": 3.6568961073133703, + "tokens_seen": 978124800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007107544141252007, + "loss": 1.6363, + "theoretical_loss": 3.6568258771967965, + "tokens_seen": 978321408 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007104333868378813, + "loss": 1.6569, + "theoretical_loss": 3.6564516214424323, + "tokens_seen": 979369984 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.011545266956090927, + "objective/train/docs_used": 562605, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.03121018409729, + "objective/train/original_loss": 3.031209945678711, + "objective/train/theoretical_loss": 3.656311407742891, + "objective/train/tokens_used": 1000223200, + "objective/train/value_avg": -0.018951416015625, + "objective/train/value_loss": 0.0018888791091740131, + "objective/train/value_max": -0.0007853507995605469, + "objective/train/value_min": -0.410400390625, + "objective/train/value_reward_corr": 0.298577662304884, + "objective/train/value_std": 0.02252197265625, + "objective/train/weight_avg": 1.0011639595031738, + "objective/train/weighted_lm_loss": 3.0350379943847656, + "objective/train/weights_max": 1.0321719646453857, + "objective/train/weights_min": 0.9251986742019653, + "theoretical_loss": 3.656311407742891, + "tokens_seen": 979763200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007101123595505619, + "loss": 1.6133, + "theoretical_loss": 3.656077878235617, + "tokens_seen": 980418560 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.004226348362863064, + "objective/train/docs_used": 563033, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1481754779815674, + "objective/train/original_loss": 3.1481752395629883, + "objective/train/theoretical_loss": 3.6557279583657434, + "objective/train/tokens_used": 1001861600, + "objective/train/value_avg": -0.01763916015625, + "objective/train/value_loss": 0.002503870753571391, + "objective/train/value_max": -0.0010166168212890625, + "objective/train/value_min": -0.8984375, + "objective/train/value_reward_corr": 0.49256051564446773, + "objective/train/value_std": 0.030975341796875, + "objective/train/weight_avg": 1.0004349946975708, + "objective/train/weighted_lm_loss": 3.149953842163086, + "objective/train/weights_max": 1.0382535457611084, + "objective/train/weights_min": 0.9134848117828369, + "theoretical_loss": 3.6557279583657434, + "tokens_seen": 981401600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007097913322632424, + "loss": 1.628, + "theoretical_loss": 3.6557046463273557, + "tokens_seen": 981467136 + }, + { + "epoch": 0.3, + "learning_rate": 0.000709470304975923, + "loss": 1.6379, + "theoretical_loss": 3.655331924473029, + "tokens_seen": 982515712 + }, + { + "debugging/Self-BLEU-5": 0.4599865317671292, + "debugging/distinct-1-grams": 0.7753828778379399, + "debugging/distinct-2-grams": 0.9579668873607778, + "debugging/entropy-1-grams": 5.851307016100259, + "debugging/entropy-2-grams": 6.818568697998407, + "debugging/length": 480.5, + "debugging/num_segments": 14, + "debugging/raw_token_scores_avg": 0.0192079059779644, + "debugging/raw_token_scores_std": 0.07754319161176682, + "epoch": 0.3, + "objective/train/advantage_avg": 0.007798772770911455, + "objective/train/docs_used": 564216, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4774277210235596, + "objective/train/original_loss": 3.4774274826049805, + "objective/train/theoretical_loss": 3.6551457544283386, + "objective/train/tokens_used": 1003500000, + "objective/train/value_avg": -0.0269622802734375, + "objective/train/value_loss": 0.00475012231618166, + "objective/train/value_max": -0.0005192756652832031, + "objective/train/value_min": -0.88037109375, + "objective/train/value_reward_corr": 0.5162917915798023, + "objective/train/value_std": 0.05706787109375, + "objective/train/weight_avg": 1.0008033514022827, + "objective/train/weighted_lm_loss": 3.479729652404785, + "objective/train/weights_max": 1.0456938743591309, + "objective/train/weights_min": 0.9076843857765198, + "theoretical_loss": 3.6551457544283386, + "tokens_seen": 983040000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007091492776886036, + "loss": 1.6527, + "theoretical_loss": 3.6549597114323706, + "tokens_seen": 983564288 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007088282504012841, + "loss": 1.6429, + "theoretical_loss": 3.6545880059694484, + "tokens_seen": 984612864 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.007448412477970123, + "objective/train/docs_used": 564602, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.069077253341675, + "objective/train/original_loss": 3.069077968597412, + "objective/train/theoretical_loss": 3.654564791203045, + "objective/train/tokens_used": 1005138400, + "objective/train/value_avg": -0.025482177734375, + "objective/train/value_loss": 0.005986152216792107, + "objective/train/value_max": -0.0011034011840820312, + "objective/train/value_min": -0.9326171875, + "objective/train/value_reward_corr": 0.524479178868289, + "objective/train/value_std": 0.0361328125, + "objective/train/weight_avg": 1.0007743835449219, + "objective/train/weighted_lm_loss": 3.0705435276031494, + "objective/train/weights_max": 1.0494740009307861, + "objective/train/weights_min": 0.9077429175376892, + "theoretical_loss": 3.654564791203045, + "tokens_seen": 984678400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007085072231139646, + "loss": 1.6558, + "theoretical_loss": 3.6542168068526433, + "tokens_seen": 985661440 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0013959239004179835, + "objective/train/docs_used": 565772, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3566415309906006, + "objective/train/original_loss": 3.3566415309906006, + "objective/train/theoretical_loss": 3.6539850639880065, + "objective/train/tokens_used": 1006776800, + "objective/train/value_avg": -0.013092041015625, + "objective/train/value_loss": 0.0026237291749566793, + "objective/train/value_max": -0.0005054473876953125, + "objective/train/value_min": -0.37841796875, + "objective/train/value_reward_corr": 0.3451772555762362, + "objective/train/value_std": 0.0197906494140625, + "objective/train/weight_avg": 1.0001524686813354, + "objective/train/weighted_lm_loss": 3.3571999073028564, + "objective/train/weights_max": 1.03083074092865, + "objective/train/weights_min": 0.9185165762901306, + "theoretical_loss": 3.6539850639880065, + "tokens_seen": 986316800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007081861958266452, + "loss": 1.6684, + "theoretical_loss": 3.653846112854634, + "tokens_seen": 986710016 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007078651685393258, + "loss": 1.6851, + "theoretical_loss": 3.6534759227523708, + "tokens_seen": 987758592 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": -0.0008539853733964264, + "objective/train/docs_used": 566398, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.388343572616577, + "objective/train/original_loss": 3.3883440494537354, + "objective/train/theoretical_loss": 3.6534065681069574, + "objective/train/tokens_used": 1008415200, + "objective/train/value_avg": -0.016998291015625, + "objective/train/value_loss": 0.007534677628427744, + "objective/train/value_max": -0.0007123947143554688, + "objective/train/value_min": -0.87548828125, + "objective/train/value_reward_corr": 0.40438342254518317, + "objective/train/value_std": 0.02978515625, + "objective/train/weight_avg": 0.9999513626098633, + "objective/train/weighted_lm_loss": 3.387758731842041, + "objective/train/weights_max": 1.0369762182235718, + "objective/train/weights_min": 0.9065502285957336, + "theoretical_loss": 3.6534065681069574, + "tokens_seen": 987955200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007075441412520063, + "loss": 1.6406, + "theoretical_loss": 3.653106235327061, + "tokens_seen": 988807168 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.008666039444506168, + "objective/train/docs_used": 567574, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.03642201423645, + "objective/train/original_loss": 3.0364222526550293, + "objective/train/theoretical_loss": 3.65282929890904, + "objective/train/tokens_used": 1010053600, + "objective/train/value_avg": -0.0127410888671875, + "objective/train/value_loss": 0.00032173070940189064, + "objective/train/value_max": -0.0008692741394042969, + "objective/train/value_min": -0.2269287109375, + "objective/train/value_reward_corr": 0.2842666067542761, + "objective/train/value_std": 0.01336669921875, + "objective/train/weight_avg": 1.0008682012557983, + "objective/train/weighted_lm_loss": 3.0388591289520264, + "objective/train/weights_max": 1.022804856300354, + "objective/train/weights_min": 0.9773038029670715, + "theoretical_loss": 3.65282929890904, + "tokens_seen": 989593600 + }, + { + "epoch": 0.3, + "learning_rate": 0.000707223113964687, + "loss": 1.6288, + "theoretical_loss": 3.6527370493641493, + "tokens_seen": 989855744 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007069020866773676, + "loss": 1.6737, + "theoretical_loss": 3.652368363653297, + "tokens_seen": 990904320 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.00503726489841938, + "objective/train/docs_used": 568180, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.3693268299102783, + "objective/train/original_loss": 2.3693275451660156, + "objective/train/theoretical_loss": 3.652253251768628, + "objective/train/tokens_used": 1011692000, + "objective/train/value_avg": -0.0185394287109375, + "objective/train/value_loss": 0.0014274590648710728, + "objective/train/value_max": -0.0011472702026367188, + "objective/train/value_min": -0.349609375, + "objective/train/value_reward_corr": 0.33913561687792815, + "objective/train/value_std": 0.019073486328125, + "objective/train/weight_avg": 1.000510811805725, + "objective/train/weighted_lm_loss": 2.3710570335388184, + "objective/train/weights_max": 1.0354336500167847, + "objective/train/weights_min": 0.9693698287010193, + "theoretical_loss": 3.652253251768628, + "tokens_seen": 991232000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007065810593900482, + "loss": 1.6252, + "theoretical_loss": 3.6520001769883628, + "tokens_seen": 991952896 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.008107101544737816, + "objective/train/docs_used": 569645, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.179076910018921, + "objective/train/original_loss": 3.179077386856079, + "objective/train/theoretical_loss": 3.651678422085146, + "objective/train/tokens_used": 1013330400, + "objective/train/value_avg": -0.0201416015625, + "objective/train/value_loss": 0.0037464320193976164, + "objective/train/value_max": -0.0008692741394042969, + "objective/train/value_min": -0.86865234375, + "objective/train/value_reward_corr": 0.29543796591511895, + "objective/train/value_std": 0.0267791748046875, + "objective/train/weight_avg": 1.0008291006088257, + "objective/train/weighted_lm_loss": 3.181429624557495, + "objective/train/weights_max": 1.0444438457489014, + "objective/train/weights_min": 0.9064845442771912, + "theoretical_loss": 3.651678422085146, + "tokens_seen": 992870400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007062600321027287, + "loss": 1.6567, + "theoretical_loss": 3.651632488167385, + "tokens_seen": 993001472 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007059390048154093, + "loss": 1.668, + "theoretical_loss": 3.651265295992563, + "tokens_seen": 994050048 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.011251918971538544, + "objective/train/docs_used": 570064, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.417724370956421, + "objective/train/original_loss": 3.417724609375, + "objective/train/theoretical_loss": 3.651104805282893, + "objective/train/tokens_used": 1014968800, + "objective/train/value_avg": -0.01507568359375, + "objective/train/value_loss": 0.0009799025719985366, + "objective/train/value_max": -0.0006513595581054688, + "objective/train/value_min": -0.36865234375, + "objective/train/value_reward_corr": 0.09888594806263255, + "objective/train/value_std": 0.02215576171875, + "objective/train/weight_avg": 1.0011301040649414, + "objective/train/weighted_lm_loss": 3.4211843013763428, + "objective/train/weights_max": 1.037306308746338, + "objective/train/weights_min": 0.9508612155914307, + "theoretical_loss": 3.651104805282893, + "tokens_seen": 994508800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007056179775280899, + "loss": 1.6797, + "theoretical_loss": 3.650898599270236, + "tokens_seen": 995098624 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.001281527103856206, + "objective/train/docs_used": 571551, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3422460556030273, + "objective/train/original_loss": 3.3422462940216064, + "objective/train/theoretical_loss": 3.6505323968108674, + "objective/train/tokens_used": 1016607200, + "objective/train/value_avg": -0.020965576171875, + "objective/train/value_loss": 0.0040998151525855064, + "objective/train/value_max": -0.0008759498596191406, + "objective/train/value_min": -0.67578125, + "objective/train/value_reward_corr": 0.4262505296502993, + "objective/train/value_std": 0.032989501953125, + "objective/train/weight_avg": 1.0001484155654907, + "objective/train/weighted_lm_loss": 3.3445942401885986, + "objective/train/weights_max": 1.0298619270324707, + "objective/train/weights_min": 0.9133073091506958, + "theoretical_loss": 3.6505323968108674, + "tokens_seen": 996147200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007052969502407704, + "loss": 1.6486, + "theoretical_loss": 3.6505323968108674, + "tokens_seen": 996147200 + }, + { + "epoch": 0.3, + "learning_rate": 0.000704975922953451, + "loss": 1.6398, + "theoretical_loss": 3.6501666874290244, + "tokens_seen": 997195776 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.006799507886171341, + "objective/train/docs_used": 572250, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.23360013961792, + "objective/train/original_loss": 3.23360013961792, + "objective/train/theoretical_loss": 3.6499611921425927, + "objective/train/tokens_used": 1018245600, + "objective/train/value_avg": -0.0208587646484375, + "objective/train/value_loss": 0.002416679635643959, + "objective/train/value_max": -0.0009698867797851562, + "objective/train/value_min": -0.4541015625, + "objective/train/value_reward_corr": 0.5229820838900392, + "objective/train/value_std": 0.031646728515625, + "objective/train/weight_avg": 1.0006918907165527, + "objective/train/weighted_lm_loss": 3.235732078552246, + "objective/train/weights_max": 1.0273393392562866, + "objective/train/weights_min": 0.9371799230575562, + "theoretical_loss": 3.6499611921425927, + "tokens_seen": 997785600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007046548956661316, + "loss": 1.6103, + "theoretical_loss": 3.6498014699433603, + "tokens_seen": 998244352 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007043338683788122, + "loss": 1.6235, + "theoretical_loss": 3.6494367431765955, + "tokens_seen": 999292928 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.006882085930556059, + "objective/train/docs_used": 573370, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.545626640319824, + "objective/train/original_loss": 3.545626401901245, + "objective/train/theoretical_loss": 3.6493911867759463, + "objective/train/tokens_used": 1019884000, + "objective/train/value_avg": -0.01517486572265625, + "objective/train/value_loss": 0.0024894792586565018, + "objective/train/value_max": -0.0009889602661132812, + "objective/train/value_min": -0.89892578125, + "objective/train/value_reward_corr": 0.44928823720978783, + "objective/train/value_std": 0.019256591796875, + "objective/train/weight_avg": 1.0007004737854004, + "objective/train/weighted_lm_loss": 3.5477442741394043, + "objective/train/weights_max": 1.02463960647583, + "objective/train/weights_min": 0.9103858470916748, + "theoretical_loss": 3.6493911867759463, + "tokens_seen": 999424000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007040128410914927, + "loss": 1.6287, + "theoretical_loss": 3.6490725059554996, + "tokens_seen": 1000341504 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.003084370633587241, + "objective/train/docs_used": 574030, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3343801498413086, + "objective/train/original_loss": 3.3343801498413086, + "objective/train/theoretical_loss": 3.6488223762329888, + "objective/train/tokens_used": 1021522400, + "objective/train/value_avg": -0.0106964111328125, + "objective/train/value_loss": 0.0005977174732834101, + "objective/train/value_max": -0.0006985664367675781, + "objective/train/value_min": -0.12139892578125, + "objective/train/value_reward_corr": 0.3295489332432879, + "objective/train/value_std": 0.01062774658203125, + "objective/train/weight_avg": 1.0003114938735962, + "objective/train/weighted_lm_loss": 3.335556983947754, + "objective/train/weights_max": 1.0095701217651367, + "objective/train/weights_min": 0.9721769094467163, + "theoretical_loss": 3.6488223762329888, + "tokens_seen": 1001062400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007036918138041734, + "loss": 1.6044, + "theoretical_loss": 3.648708757110873, + "tokens_seen": 1001390080 + }, + { + "epoch": 0.3, + "learning_rate": 0.000703370786516854, + "loss": 1.6357, + "theoretical_loss": 3.6483454954775305, + "tokens_seen": 1002438656 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0016159217339009047, + "objective/train/docs_used": 574442, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.236109733581543, + "objective/train/original_loss": 3.236109972000122, + "objective/train/theoretical_loss": 3.648254756059793, + "objective/train/tokens_used": 1023160800, + "objective/train/value_avg": -0.0201873779296875, + "objective/train/value_loss": 0.0026790916454046965, + "objective/train/value_max": -0.000583648681640625, + "objective/train/value_min": -0.48046875, + "objective/train/value_reward_corr": 0.7454680102111926, + "objective/train/value_std": 0.0419921875, + "objective/train/weight_avg": 1.0001747608184814, + "objective/train/weighted_lm_loss": 3.2365283966064453, + "objective/train/weights_max": 1.0295727252960205, + "objective/train/weights_min": 0.9321368932723999, + "theoretical_loss": 3.648254756059793, + "tokens_seen": 1002700800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007030497592295345, + "loss": 1.6385, + "theoretical_loss": 3.64798271989428, + "tokens_seen": 1003487232 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.006675776094198227, + "objective/train/docs_used": 575683, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.399979591369629, + "objective/train/original_loss": 3.3999791145324707, + "objective/train/theoretical_loss": 3.64768832182628, + "objective/train/tokens_used": 1024799200, + "objective/train/value_avg": -0.019195556640625, + "objective/train/value_loss": 0.002105290535837412, + "objective/train/value_max": -0.0008296966552734375, + "objective/train/value_min": -0.4873046875, + "objective/train/value_reward_corr": 0.46516795589907045, + "objective/train/value_std": 0.027587890625, + "objective/train/weight_avg": 1.0006780624389648, + "objective/train/weighted_lm_loss": 3.4025814533233643, + "objective/train/weights_max": 1.0448070764541626, + "objective/train/weights_min": 0.9363419413566589, + "theoretical_loss": 3.64768832182628, + "tokens_seen": 1004339200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007027287319422151, + "loss": 1.6503, + "theoretical_loss": 3.647620429203908, + "tokens_seen": 1004535808 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007024077046548957, + "loss": 1.6252, + "theoretical_loss": 3.6472586222531587, + "tokens_seen": 1005584384 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.004819732159376144, + "objective/train/docs_used": 576356, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6087849140167236, + "objective/train/original_loss": 3.6087849140167236, + "objective/train/theoretical_loss": 3.6471230691260477, + "objective/train/tokens_used": 1026437600, + "objective/train/value_avg": -0.02197265625, + "objective/train/value_loss": 0.0059167384169995785, + "objective/train/value_max": -0.0005092620849609375, + "objective/train/value_min": -0.97607421875, + "objective/train/value_reward_corr": 0.6075905436723154, + "objective/train/value_std": 0.048828125, + "objective/train/weight_avg": 1.0005109310150146, + "objective/train/weighted_lm_loss": 3.61043381690979, + "objective/train/weights_max": 1.0545731782913208, + "objective/train/weights_min": 0.9075435400009155, + "theoretical_loss": 3.6471230691260477, + "tokens_seen": 1005977600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007020866773675763, + "loss": 1.6199, + "theoretical_loss": 3.6468972978927208, + "tokens_seen": 1006632960 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.008817935362458229, + "objective/train/docs_used": 577861, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0984532833099365, + "objective/train/original_loss": 3.0984528064727783, + "objective/train/theoretical_loss": 3.6465589935762104, + "objective/train/tokens_used": 1028076000, + "objective/train/value_avg": -0.025482177734375, + "objective/train/value_loss": 0.00192506134044379, + "objective/train/value_max": -0.0009508132934570312, + "objective/train/value_min": -0.293212890625, + "objective/train/value_reward_corr": 0.5270192345553183, + "objective/train/value_std": 0.0355224609375, + "objective/train/weight_avg": 1.0008913278579712, + "objective/train/weighted_lm_loss": 3.101606845855713, + "objective/train/weights_max": 1.0281153917312622, + "objective/train/weights_min": 0.9532116651535034, + "theoretical_loss": 3.6465589935762104, + "tokens_seen": 1007616000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007017656500802568, + "loss": 1.6317, + "theoretical_loss": 3.646536454977205, + "tokens_seen": 1007681536 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007014446227929374, + "loss": 1.6185, + "theoretical_loss": 3.6461760923651294, + "tokens_seen": 1008730112 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.001510413596406579, + "objective/train/docs_used": 578450, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.64322829246521, + "objective/train/original_loss": 3.6432278156280518, + "objective/train/theoretical_loss": 3.645996090817232, + "objective/train/tokens_used": 1029714400, + "objective/train/value_avg": -0.053009033203125, + "objective/train/value_loss": 0.016285311430692673, + "objective/train/value_max": -0.000720977783203125, + "objective/train/value_min": -0.837890625, + "objective/train/value_reward_corr": 0.4935232327191909, + "objective/train/value_std": 0.0689697265625, + "objective/train/weight_avg": 0.9999294281005859, + "objective/train/weighted_lm_loss": 3.6452791690826416, + "objective/train/weights_max": 1.0674171447753906, + "objective/train/weights_min": 0.9110978841781616, + "theoretical_loss": 3.645996090817232, + "tokens_seen": 1009254400 + }, + { + "epoch": 0.31, + "learning_rate": 0.000701123595505618, + "loss": 1.6177, + "theoretical_loss": 3.645816208918901, + "tokens_seen": 1009778688 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007008025682182986, + "loss": 1.623, + "theoretical_loss": 3.6454568035048003, + "tokens_seen": 1010827264 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.006267358083277941, + "objective/train/docs_used": 579792, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2493350505828857, + "objective/train/original_loss": 3.249335289001465, + "objective/train/theoretical_loss": 3.645434356512767, + "objective/train/tokens_used": 1031352800, + "objective/train/value_avg": -0.01198577880859375, + "objective/train/value_loss": 0.0010294427629560232, + "objective/train/value_max": -0.000469207763671875, + "objective/train/value_min": -0.599609375, + "objective/train/value_reward_corr": 0.3614207509719149, + "objective/train/value_std": 0.01493072509765625, + "objective/train/weight_avg": 1.0006318092346191, + "objective/train/weighted_lm_loss": 3.251227378845215, + "objective/train/weights_max": 1.021959900856018, + "objective/train/weights_min": 0.9149268269538879, + "theoretical_loss": 3.645434356512767, + "tokens_seen": 1010892800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007004815409309791, + "loss": 1.6042, + "theoretical_loss": 3.645097874992961, + "tokens_seen": 1011875840 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.004718293901532888, + "objective/train/docs_used": 580179, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0488810539245605, + "objective/train/original_loss": 3.0488815307617188, + "objective/train/theoretical_loss": 3.644873786349497, + "objective/train/tokens_used": 1032991200, + "objective/train/value_avg": -0.01557159423828125, + "objective/train/value_loss": 0.0033434750512242317, + "objective/train/value_max": -0.0009255409240722656, + "objective/train/value_min": -0.876953125, + "objective/train/value_reward_corr": 0.34896727141226624, + "objective/train/value_std": 0.0204620361328125, + "objective/train/weight_avg": 1.0004881620407104, + "objective/train/weighted_lm_loss": 3.050386428833008, + "objective/train/weights_max": 1.0261589288711548, + "objective/train/weights_min": 0.9081581234931946, + "theoretical_loss": 3.644873786349497, + "tokens_seen": 1012531200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007001605136436597, + "loss": 1.6391, + "theoretical_loss": 3.6447394222573557, + "tokens_seen": 1012924416 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006998394863563404, + "loss": 1.6165, + "theoretical_loss": 3.644381444175778, + "tokens_seen": 1013972992 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.010201825760304928, + "objective/train/docs_used": 581599, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2738289833068848, + "objective/train/original_loss": 3.2738287448883057, + "objective/train/theoretical_loss": 3.6443143760369736, + "objective/train/tokens_used": 1034629600, + "objective/train/value_avg": -0.01812744140625, + "objective/train/value_loss": 0.0023600708227604628, + "objective/train/value_max": -0.0009398460388183594, + "objective/train/value_min": -0.358154296875, + "objective/train/value_reward_corr": 0.2856941360534917, + "objective/train/value_std": 0.0224761962890625, + "objective/train/weight_avg": 1.001031756401062, + "objective/train/weighted_lm_loss": 3.2767248153686523, + "objective/train/weights_max": 1.0270562171936035, + "objective/train/weights_min": 0.9187646508216858, + "theoretical_loss": 3.6443143760369736, + "tokens_seen": 1014169600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006995184590690209, + "loss": 1.598, + "theoretical_loss": 3.6440239396298244, + "tokens_seen": 1015021568 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.008158042095601559, + "objective/train/docs_used": 582330, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.02901291847229, + "objective/train/original_loss": 3.029012680053711, + "objective/train/theoretical_loss": 3.643756121307459, + "objective/train/tokens_used": 1036268000, + "objective/train/value_avg": -0.01207733154296875, + "objective/train/value_loss": 0.0010226324666291475, + "objective/train/value_max": -0.0008330345153808594, + "objective/train/value_min": -0.326904296875, + "objective/train/value_reward_corr": 0.35244658285885333, + "objective/train/value_std": 0.01422119140625, + "objective/train/weight_avg": 1.0008208751678467, + "objective/train/weighted_lm_loss": 3.0314414501190186, + "objective/train/weights_max": 1.0194053649902344, + "objective/train/weights_min": 0.9129219055175781, + "theoretical_loss": 3.643756121307459, + "tokens_seen": 1015808000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006991974317817015, + "loss": 1.5998, + "theoretical_loss": 3.643666907504879, + "tokens_seen": 1016070144 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006988764044943821, + "loss": 1.6097, + "theoretical_loss": 3.6433103466900962, + "tokens_seen": 1017118720 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0025365145411342382, + "objective/train/docs_used": 582938, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.2316882610321045, + "objective/train/original_loss": 2.2316880226135254, + "objective/train/theoretical_loss": 3.6431990179157694, + "objective/train/tokens_used": 1037906400, + "objective/train/value_avg": -0.014923095703125, + "objective/train/value_loss": 0.00045016678632237017, + "objective/train/value_max": -0.0006747245788574219, + "objective/train/value_min": -0.1815185546875, + "objective/train/value_reward_corr": 0.40700687358631393, + "objective/train/value_std": 0.01473236083984375, + "objective/train/weight_avg": 1.0002559423446655, + "objective/train/weighted_lm_loss": 2.23292875289917, + "objective/train/weights_max": 1.0182251930236816, + "objective/train/weights_min": 0.9807248115539551, + "theoretical_loss": 3.6431990179157694, + "tokens_seen": 1017446400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006985553772070627, + "loss": 1.6149, + "theoretical_loss": 3.6429542560783856, + "tokens_seen": 1018167296 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.008470248430967331, + "objective/train/docs_used": 584109, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.340682029724121, + "objective/train/original_loss": 3.340682029724121, + "objective/train/theoretical_loss": 3.642643061639121, + "objective/train/tokens_used": 1039544800, + "objective/train/value_avg": -0.0229644775390625, + "objective/train/value_loss": 0.004365065135061741, + "objective/train/value_max": -0.000904083251953125, + "objective/train/value_min": -0.94775390625, + "objective/train/value_reward_corr": 0.3471599147815712, + "objective/train/value_std": 0.033355712890625, + "objective/train/weight_avg": 1.0008684396743774, + "objective/train/weighted_lm_loss": 3.3431031703948975, + "objective/train/weights_max": 1.0755208730697632, + "objective/train/weights_min": 0.908639132976532, + "theoretical_loss": 3.642643061639121, + "tokens_seen": 1019084800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006982343499197432, + "loss": 1.5721, + "theoretical_loss": 3.6425986345663914, + "tokens_seen": 1019215872 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006979133226324238, + "loss": 1.5957, + "theoretical_loss": 3.6422434810544813, + "tokens_seen": 1020264448 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.002981671364977956, + "objective/train/docs_used": 584789, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4687442779541016, + "objective/train/original_loss": 3.4687440395355225, + "objective/train/theoretical_loss": 3.642088248276974, + "objective/train/tokens_used": 1041183200, + "objective/train/value_avg": -0.028717041015625, + "objective/train/value_loss": 0.01703738421201706, + "objective/train/value_max": -0.0005702972412109375, + "objective/train/value_min": -0.9921875, + "objective/train/value_reward_corr": 0.43836657442785054, + "objective/train/value_std": 0.07470703125, + "objective/train/weight_avg": 0.9997853636741638, + "objective/train/weighted_lm_loss": 3.4663658142089844, + "objective/train/weights_max": 1.0968964099884033, + "objective/train/weights_min": 0.9061781764030457, + "theoretical_loss": 3.642088248276974, + "tokens_seen": 1020723200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006975922953451044, + "loss": 1.5751, + "theoretical_loss": 3.641888794446725, + "tokens_seen": 1021313024 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.0009242170490324497, + "objective/train/docs_used": 586155, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2924318313598633, + "objective/train/original_loss": 3.2924318313598633, + "objective/train/theoretical_loss": 3.6415345736508824, + "objective/train/tokens_used": 1042821600, + "objective/train/value_avg": -0.0203399658203125, + "objective/train/value_loss": 0.008307679556310177, + "objective/train/value_max": -0.0005679130554199219, + "objective/train/value_min": -0.9609375, + "objective/train/value_reward_corr": 0.554307501717763, + "objective/train/value_std": 0.058868408203125, + "objective/train/weight_avg": 0.9999483823776245, + "objective/train/weighted_lm_loss": 3.291699171066284, + "objective/train/weights_max": 1.0669565200805664, + "objective/train/weights_min": 0.9071041345596313, + "theoretical_loss": 3.6415345736508824, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006972712680577849, + "loss": 1.5993, + "theoretical_loss": 3.6415345736508824, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006969502407704655, + "loss": 1.5774, + "theoretical_loss": 3.6411808175783844, + "tokens_seen": 1023410176 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.0024215097073465586, + "objective/train/docs_used": 586672, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3133046627044678, + "objective/train/original_loss": 3.313305377960205, + "objective/train/theoretical_loss": 3.6409820336043413, + "objective/train/tokens_used": 1044460000, + "objective/train/value_avg": -0.033172607421875, + "objective/train/value_loss": 0.015489381738007069, + "objective/train/value_max": -0.00075531005859375, + "objective/train/value_min": -0.9755859375, + "objective/train/value_reward_corr": 0.5477556433593624, + "objective/train/value_std": 0.0745849609375, + "objective/train/weight_avg": 0.9998340606689453, + "objective/train/weighted_lm_loss": 3.3135337829589844, + "objective/train/weights_max": 1.0767897367477417, + "objective/train/weights_min": 0.9090582728385925, + "theoretical_loss": 3.6409820336043413, + "tokens_seen": 1024000000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006966292134831461, + "loss": 1.5525, + "theoretical_loss": 3.640827525144318, + "tokens_seen": 1024458752 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006963081861958267, + "loss": 1.5978, + "theoretical_loss": 3.64047469526741, + "tokens_seen": 1025507328 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.006503461394459009, + "objective/train/docs_used": 587795, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9637703895568848, + "objective/train/original_loss": 2.9637703895568848, + "objective/train/theoretical_loss": 3.6404306240026356, + "objective/train/tokens_used": 1046098400, + "objective/train/value_avg": -0.0211334228515625, + "objective/train/value_loss": 0.005146055482327938, + "objective/train/value_max": -0.0010366439819335938, + "objective/train/value_min": -0.7216796875, + "objective/train/value_reward_corr": 0.2676313246931955, + "objective/train/value_std": 0.0312042236328125, + "objective/train/weight_avg": 1.0006756782531738, + "objective/train/weighted_lm_loss": 2.965015172958374, + "objective/train/weights_max": 1.0610138177871704, + "objective/train/weights_min": 0.9143593907356262, + "theoretical_loss": 3.6404306240026356, + "tokens_seen": 1025638400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006959871589085072, + "loss": 1.5924, + "theoretical_loss": 3.640122326870012, + "tokens_seen": 1026555904 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.00029594183433800936, + "objective/train/docs_used": 588595, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.1917881965637207, + "objective/train/original_loss": 2.1917881965637207, + "objective/train/theoretical_loss": 3.6398803407326934, + "objective/train/tokens_used": 1047736800, + "objective/train/value_avg": -0.0340576171875, + "objective/train/value_loss": 0.014400861226022243, + "objective/train/value_max": -0.0009436607360839844, + "objective/train/value_min": -0.9326171875, + "objective/train/value_reward_corr": 0.4435257577059666, + "objective/train/value_std": 0.07769775390625, + "objective/train/weight_avg": 1.0000412464141846, + "objective/train/weighted_lm_loss": 2.1891121864318848, + "objective/train/weights_max": 1.0745481252670288, + "objective/train/weights_min": 0.9083473682403564, + "theoretical_loss": 3.6398803407326934, + "tokens_seen": 1027276800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006956661316211879, + "loss": 1.5988, + "theoretical_loss": 3.639770418878081, + "tokens_seen": 1027604480 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006953451043338685, + "loss": 1.5646, + "theoretical_loss": 3.6394189702211706, + "tokens_seen": 1028653056 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.002887995447963476, + "objective/train/docs_used": 589927, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.741786479949951, + "objective/train/original_loss": 2.7417867183685303, + "objective/train/theoretical_loss": 3.6393311797029373, + "objective/train/tokens_used": 1049375200, + "objective/train/value_avg": -0.01702880859375, + "objective/train/value_loss": 0.004898994229733944, + "objective/train/value_max": -0.0005679130554199219, + "objective/train/value_min": -0.90625, + "objective/train/value_reward_corr": 0.5359447526114984, + "objective/train/value_std": 0.0252532958984375, + "objective/train/weight_avg": 0.9997351765632629, + "objective/train/weighted_lm_loss": 2.7412338256835938, + "objective/train/weights_max": 1.0299830436706543, + "objective/train/weights_min": 0.9079298973083496, + "theoretical_loss": 3.6393311797029373, + "tokens_seen": 1028915200 + }, + { + "epoch": 0.31, + "learning_rate": 0.000695024077046549, + "loss": 1.5955, + "theoretical_loss": 3.639067979832408, + "tokens_seen": 1029701632 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.004665323533117771, + "objective/train/docs_used": 590720, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.370556354522705, + "objective/train/original_loss": 3.370556116104126, + "objective/train/theoretical_loss": 3.638783136843138, + "objective/train/tokens_used": 1051013600, + "objective/train/value_avg": -0.01995849609375, + "objective/train/value_loss": 0.0030596465803682804, + "objective/train/value_max": -0.0006361007690429688, + "objective/train/value_min": -0.66943359375, + "objective/train/value_reward_corr": 0.4554733065689616, + "objective/train/value_std": 0.02789306640625, + "objective/train/weight_avg": 1.0004816055297852, + "objective/train/weighted_lm_loss": 3.372448444366455, + "objective/train/weights_max": 1.0225181579589844, + "objective/train/weights_min": 0.9108127951622009, + "theoretical_loss": 3.638783136843138, + "tokens_seen": 1030553600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006947030497592296, + "loss": 1.5871, + "theoretical_loss": 3.6387174466484824, + "tokens_seen": 1030750208 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006943820224719102, + "loss": 1.5967, + "theoretical_loss": 3.6383673696096297, + "tokens_seen": 1031798784 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.004457978997379541, + "objective/train/docs_used": 591991, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4116344451904297, + "objective/train/original_loss": 3.4116339683532715, + "objective/train/theoretical_loss": 3.63823620810427, + "objective/train/tokens_used": 1052652000, + "objective/train/value_avg": -0.017120361328125, + "objective/train/value_loss": 0.0035874268505722284, + "objective/train/value_max": -0.0008296966552734375, + "objective/train/value_min": -0.460205078125, + "objective/train/value_reward_corr": 0.2098625703322276, + "objective/train/value_std": 0.0254669189453125, + "objective/train/weight_avg": 1.000463604927063, + "objective/train/weighted_lm_loss": 3.4142560958862305, + "objective/train/weights_max": 1.0441280603408813, + "objective/train/weights_min": 0.9126567840576172, + "theoretical_loss": 3.63823620810427, + "tokens_seen": 1032192000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006940609951845908, + "loss": 1.5911, + "theoretical_loss": 3.638017747659614, + "tokens_seen": 1032847360 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.007325907703489065, + "objective/train/docs_used": 592462, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.716585874557495, + "objective/train/original_loss": 3.716585636138916, + "objective/train/theoretical_loss": 3.637690389458365, + "objective/train/tokens_used": 1054290400, + "objective/train/value_avg": -0.01300048828125, + "objective/train/value_loss": 0.0014244684716686606, + "objective/train/value_max": -0.0005908012390136719, + "objective/train/value_min": -0.26806640625, + "objective/train/value_reward_corr": 0.1603102772835162, + "objective/train/value_std": 0.0137786865234375, + "objective/train/weight_avg": 1.000739574432373, + "objective/train/weighted_lm_loss": 3.719301700592041, + "objective/train/weights_max": 1.0258445739746094, + "objective/train/weights_min": 0.9123022556304932, + "theoretical_loss": 3.637690389458365, + "tokens_seen": 1033830400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006937399678972712, + "loss": 1.6151, + "theoretical_loss": 3.637668579745716, + "tokens_seen": 1033895936 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006934189406099518, + "loss": 1.614, + "theoretical_loss": 3.637319864818716, + "tokens_seen": 1034944512 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0016251251799985766, + "objective/train/docs_used": 593828, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.044060468673706, + "objective/train/original_loss": 3.044060707092285, + "objective/train/theoretical_loss": 3.637145676898374, + "objective/train/tokens_used": 1055928800, + "objective/train/value_avg": -0.02777099609375, + "objective/train/value_loss": 0.004856533370912075, + "objective/train/value_max": -0.0012493133544921875, + "objective/train/value_min": -0.8955078125, + "objective/train/value_reward_corr": 0.5857612324685174, + "objective/train/value_std": 0.0545654296875, + "objective/train/weight_avg": 1.000186562538147, + "objective/train/weighted_lm_loss": 3.045487642288208, + "objective/train/weights_max": 1.0416079759597778, + "objective/train/weights_min": 0.911716639995575, + "theoretical_loss": 3.637145676898374, + "tokens_seen": 1035468800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006930979133226324, + "loss": 1.5981, + "theoretical_loss": 3.6369716018328777, + "tokens_seen": 1035993088 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006927768860353129, + "loss": 1.6425, + "theoretical_loss": 3.6366237897459355, + "tokens_seen": 1037041664 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.006275584455579519, + "objective/train/docs_used": 594513, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2921483516693115, + "objective/train/original_loss": 3.2921483516693115, + "objective/train/theoretical_loss": 3.636602066438022, + "objective/train/tokens_used": 1057567200, + "objective/train/value_avg": -0.01180267333984375, + "objective/train/value_loss": 0.0005701882182620466, + "objective/train/value_max": -0.000743865966796875, + "objective/train/value_min": -0.135009765625, + "objective/train/value_reward_corr": 0.17889907406432226, + "objective/train/value_std": 0.01013946533203125, + "objective/train/weight_avg": 1.000630497932434, + "objective/train/weighted_lm_loss": 3.2942168712615967, + "objective/train/weights_max": 1.0134514570236206, + "objective/train/weights_min": 0.9576974511146545, + "theoretical_loss": 3.636602066438022, + "tokens_seen": 1037107200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006924558587479935, + "loss": 1.624, + "theoretical_loss": 3.6362764275190766, + "tokens_seen": 1038090240 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.008635456673800945, + "objective/train/docs_used": 595196, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.575434446334839, + "objective/train/original_loss": 3.5754339694976807, + "objective/train/theoretical_loss": 3.636059554111668, + "objective/train/tokens_used": 1059205600, + "objective/train/value_avg": -0.01314544677734375, + "objective/train/value_loss": 0.007354561239480972, + "objective/train/value_max": -0.00047659873962402344, + "objective/train/value_min": -0.55126953125, + "objective/train/value_reward_corr": 0.5484643153684323, + "objective/train/value_std": 0.01454925537109375, + "objective/train/weight_avg": 0.9991726279258728, + "objective/train/weighted_lm_loss": 3.571971893310547, + "objective/train/weights_max": 1.0218583345413208, + "objective/train/weights_min": 0.9063756465911865, + "theoretical_loss": 3.636059554111668, + "tokens_seen": 1038745600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006921348314606741, + "loss": 1.5811, + "theoretical_loss": 3.6359295141169303, + "tokens_seen": 1039138816 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006918138041733547, + "loss": 1.6092, + "theoretical_loss": 3.6355830485075473, + "tokens_seen": 1040187392 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.005722053349018097, + "objective/train/docs_used": 596146, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3662192821502686, + "objective/train/original_loss": 3.3662195205688477, + "objective/train/theoretical_loss": 3.6355181359741673, + "objective/train/tokens_used": 1060844000, + "objective/train/value_avg": -0.0163726806640625, + "objective/train/value_loss": 0.00045832470641471446, + "objective/train/value_max": -0.0007762908935546875, + "objective/train/value_min": -0.16357421875, + "objective/train/value_reward_corr": 0.5152312416187949, + "objective/train/value_std": 0.015716552734375, + "objective/train/weight_avg": 1.0005745887756348, + "objective/train/weighted_lm_loss": 3.36834454536438, + "objective/train/weights_max": 1.0110846757888794, + "objective/train/weights_min": 0.9638657569885254, + "theoretical_loss": 3.6355181359741673, + "tokens_seen": 1040384000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006914927768860353, + "loss": 1.5903, + "theoretical_loss": 3.635237029662391, + "tokens_seen": 1041235968 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.009236250072717667, + "objective/train/docs_used": 596810, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5615527629852295, + "objective/train/original_loss": 2.5615527629852295, + "objective/train/theoretical_loss": 3.6349778081007327, + "objective/train/tokens_used": 1062482400, + "objective/train/value_avg": -0.0176544189453125, + "objective/train/value_loss": 0.0016186271095648408, + "objective/train/value_max": -0.0004494190216064453, + "objective/train/value_min": -0.705078125, + "objective/train/value_reward_corr": 0.2147351960070332, + "objective/train/value_std": 0.033203125, + "objective/train/weight_avg": 1.000931739807129, + "objective/train/weighted_lm_loss": 2.563370704650879, + "objective/train/weights_max": 1.0672663450241089, + "objective/train/weights_min": 0.9368653297424316, + "theoretical_loss": 3.6349778081007327, + "tokens_seen": 1042022400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006911717495987159, + "loss": 1.5626, + "theoretical_loss": 3.6348914565563186, + "tokens_seen": 1042284544 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006908507223113965, + "loss": 1.6015, + "theoretical_loss": 3.6345463281675676, + "tokens_seen": 1043333120 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.00451819458976388, + "objective/train/docs_used": 598197, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1886656284332275, + "objective/train/original_loss": 3.1886656284332275, + "objective/train/theoretical_loss": 3.6344385665867973, + "objective/train/tokens_used": 1064120800, + "objective/train/value_avg": -0.01178741455078125, + "objective/train/value_loss": 0.0018949242075905204, + "objective/train/value_max": -0.0005769729614257812, + "objective/train/value_min": -0.317626953125, + "objective/train/value_reward_corr": 0.2669181376440587, + "objective/train/value_std": 0.01256561279296875, + "objective/train/weight_avg": 1.000461220741272, + "objective/train/weighted_lm_loss": 3.1898245811462402, + "objective/train/weights_max": 1.0173473358154297, + "objective/train/weights_min": 0.9255688786506653, + "theoretical_loss": 3.6344385665867973, + "tokens_seen": 1043660800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006905296950240771, + "loss": 1.6221, + "theoretical_loss": 3.6342016434777427, + "tokens_seen": 1044381696 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.004803313408046961, + "objective/train/docs_used": 598880, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1082122325897217, + "objective/train/original_loss": 3.1082122325897217, + "objective/train/theoretical_loss": 3.6339004075478796, + "objective/train/tokens_used": 1065759200, + "objective/train/value_avg": -0.01873779296875, + "objective/train/value_loss": 0.004636182449758053, + "objective/train/value_max": -0.0004372596740722656, + "objective/train/value_min": -0.794921875, + "objective/train/value_reward_corr": 0.7035994909971108, + "objective/train/value_std": 0.0423583984375, + "objective/train/weight_avg": 1.0005030632019043, + "objective/train/weighted_lm_loss": 3.109774589538574, + "objective/train/weights_max": 1.035140872001648, + "objective/train/weights_min": 0.9104219675064087, + "theoretical_loss": 3.6339004075478796, + "tokens_seen": 1045299200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006902086677367576, + "loss": 1.6187, + "theoretical_loss": 3.6338574014717997, + "tokens_seen": 1045430272 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006898876404494382, + "loss": 1.6323, + "theoretical_loss": 3.6335136011380307, + "tokens_seen": 1046478848 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.006524696480482817, + "objective/train/docs_used": 599993, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0653693675994873, + "objective/train/original_loss": 3.065369129180908, + "objective/train/theoretical_loss": 3.633363327119448, + "objective/train/tokens_used": 1067397600, + "objective/train/value_avg": -0.010498046875, + "objective/train/value_loss": 0.0004715508548542857, + "objective/train/value_max": -0.0006117820739746094, + "objective/train/value_min": -0.183349609375, + "objective/train/value_reward_corr": 0.35260084997503377, + "objective/train/value_std": 0.01038360595703125, + "objective/train/weight_avg": 1.000654697418213, + "objective/train/weighted_lm_loss": 3.0672364234924316, + "objective/train/weights_max": 1.011035442352295, + "objective/train/weights_min": 0.9492418766021729, + "theoretical_loss": 3.633363327119448, + "tokens_seen": 1046937600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006895666131621188, + "loss": 1.5857, + "theoretical_loss": 3.6331702414680525, + "tokens_seen": 1047527424 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": -0.0024159548338502645, + "objective/train/docs_used": 601088, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3895926475524902, + "objective/train/original_loss": 3.3895926475524902, + "objective/train/theoretical_loss": 3.632827321456789, + "objective/train/tokens_used": 1069036000, + "objective/train/value_avg": -0.0235595703125, + "objective/train/value_loss": 0.006200904957950115, + "objective/train/value_max": -0.0006361007690429688, + "objective/train/value_min": -0.8603515625, + "objective/train/value_reward_corr": 0.34893983110639015, + "objective/train/value_std": 0.0289459228515625, + "objective/train/weight_avg": 0.9997889399528503, + "objective/train/weighted_lm_loss": 3.3901009559631348, + "objective/train/weights_max": 1.0489840507507324, + "objective/train/weights_min": 0.9119029641151428, + "theoretical_loss": 3.632827321456789, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006892455858747993, + "loss": 1.5865, + "theoretical_loss": 3.632827321456789, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006889245585874799, + "loss": 1.605, + "theoretical_loss": 3.6324848401024594, + "tokens_seen": 1049624576 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": -0.0017417611088603735, + "objective/train/docs_used": 601638, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3971664905548096, + "objective/train/original_loss": 3.3971660137176514, + "objective/train/theoretical_loss": 3.632292386734872, + "objective/train/tokens_used": 1070674400, + "objective/train/value_avg": -0.01438140869140625, + "objective/train/value_loss": 0.006895124912261963, + "objective/train/value_max": -0.0006566047668457031, + "objective/train/value_min": -0.96875, + "objective/train/value_reward_corr": 0.3442893452795004, + "objective/train/value_std": 0.027801513671875, + "objective/train/weight_avg": 0.999859631061554, + "objective/train/weighted_lm_loss": 3.395590305328369, + "objective/train/weights_max": 1.0575478076934814, + "objective/train/weights_min": 0.9084599018096924, + "theoretical_loss": 3.632292386734872, + "tokens_seen": 1050214400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006886035313001605, + "loss": 1.6131, + "theoretical_loss": 3.632142796406564, + "tokens_seen": 1050673152 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006882825040128411, + "loss": 1.5989, + "theoretical_loss": 3.631801189373867, + "tokens_seen": 1051721728 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.008846897631883621, + "objective/train/docs_used": 602388, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.371502161026001, + "objective/train/original_loss": 3.371502161026001, + "objective/train/theoretical_loss": 3.631758519148221, + "objective/train/tokens_used": 1072312800, + "objective/train/value_avg": -0.0199127197265625, + "objective/train/value_loss": 0.0036496836692094803, + "objective/train/value_max": -0.0006117820739746094, + "objective/train/value_min": -0.73828125, + "objective/train/value_reward_corr": 0.5449256638068979, + "objective/train/value_std": 0.0322265625, + "objective/train/weight_avg": 1.0009026527404785, + "objective/train/weighted_lm_loss": 3.375081777572632, + "objective/train/weights_max": 1.02909255027771, + "objective/train/weights_min": 0.9112845063209534, + "theoretical_loss": 3.631758519148221, + "tokens_seen": 1051852800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006879614767255216, + "loss": 1.6233, + "theoretical_loss": 3.631460018012389, + "tokens_seen": 1052770304 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0037469128146767616, + "objective/train/docs_used": 603451, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.73030161857605, + "objective/train/original_loss": 3.7303013801574707, + "objective/train/theoretical_loss": 3.6312257149107814, + "objective/train/tokens_used": 1073951200, + "objective/train/value_avg": -0.01898193359375, + "objective/train/value_loss": 0.003405196126550436, + "objective/train/value_max": -0.0004546642303466797, + "objective/train/value_min": -0.7294921875, + "objective/train/value_reward_corr": 0.5522208318009975, + "objective/train/value_std": 0.04052734375, + "objective/train/weight_avg": 1.0003916025161743, + "objective/train/weighted_lm_loss": 3.7312653064727783, + "objective/train/weights_max": 1.0577646493911743, + "objective/train/weights_min": 0.9346166849136353, + "theoretical_loss": 3.6312257149107814, + "tokens_seen": 1053491200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006876404494382023, + "loss": 1.579, + "theoretical_loss": 3.631119281333386, + "tokens_seen": 1053818880 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006873194221508829, + "loss": 1.5789, + "theoretical_loss": 3.6307789783513402, + "tokens_seen": 1054867456 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0027461457066237926, + "objective/train/docs_used": 603451, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2004551887512207, + "objective/train/original_loss": 3.2004544734954834, + "objective/train/theoretical_loss": 3.630693970255794, + "objective/train/tokens_used": 1075589600, + "objective/train/value_avg": -0.013885498046875, + "objective/train/value_loss": 0.00464292848482728, + "objective/train/value_max": -0.000820159912109375, + "objective/train/value_min": -0.36474609375, + "objective/train/value_reward_corr": 0.18313171868230843, + "objective/train/value_std": 0.0142974853515625, + "objective/train/weight_avg": 1.0002973079681396, + "objective/train/weighted_lm_loss": 3.2008354663848877, + "objective/train/weights_max": 1.0226854085922241, + "objective/train/weights_min": 0.9079737663269043, + "theoretical_loss": 3.630693970255794, + "tokens_seen": 1055129600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006869983948635634, + "loss": 1.6455, + "theoretical_loss": 3.6304391080839453, + "tokens_seen": 1055916032 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.00974293053150177, + "objective/train/docs_used": 603451, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.474087715148926, + "objective/train/original_loss": 3.4740874767303467, + "objective/train/theoretical_loss": 3.6301632814356637, + "objective/train/tokens_used": 1077228000, + "objective/train/value_avg": -0.019989013671875, + "objective/train/value_loss": 0.0024389689788222313, + "objective/train/value_max": -0.000820159912109375, + "objective/train/value_min": -0.53759765625, + "objective/train/value_reward_corr": 0.28630494567086257, + "objective/train/value_std": 0.0247955322265625, + "objective/train/weight_avg": 1.0009863376617432, + "objective/train/weighted_lm_loss": 3.4770820140838623, + "objective/train/weights_max": 1.043904185295105, + "objective/train/weights_min": 0.934797465801239, + "theoretical_loss": 3.6301632814356637, + "tokens_seen": 1056768000 + }, + { + "epoch": 0.32, + "learning_rate": 0.000686677367576244, + "loss": 1.6627, + "theoretical_loss": 3.630099669552091, + "tokens_seen": 1056964608 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006863563402889246, + "loss": 1.6242, + "theoretical_loss": 3.6297606617798532, + "tokens_seen": 1058013184 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.00897289626300335, + "objective/train/docs_used": 604503, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4013986587524414, + "objective/train/original_loss": 3.401398181915283, + "objective/train/theoretical_loss": 3.629633644721836, + "objective/train/tokens_used": 1078866400, + "objective/train/value_avg": -0.01654052734375, + "objective/train/value_loss": 0.000980466022156179, + "objective/train/value_max": -0.0005273818969726562, + "objective/train/value_min": -0.79638671875, + "objective/train/value_reward_corr": 0.39026293437830356, + "objective/train/value_std": 0.0225372314453125, + "objective/train/weight_avg": 1.0009021759033203, + "objective/train/weighted_lm_loss": 3.404343843460083, + "objective/train/weights_max": 1.0314016342163086, + "objective/train/weights_min": 0.9545326232910156, + "theoretical_loss": 3.629633644721836, + "tokens_seen": 1058406400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006860353130016052, + "loss": 1.6249, + "theoretical_loss": 3.629422083794477, + "tokens_seen": 1059061760 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": -0.0019313275115564466, + "objective/train/docs_used": 605021, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5869603157043457, + "objective/train/original_loss": 3.5869603157043457, + "objective/train/theoretical_loss": 3.6291050564046676, + "objective/train/tokens_used": 1080504800, + "objective/train/value_avg": -0.0157470703125, + "objective/train/value_loss": 0.006440610159188509, + "objective/train/value_max": -0.0003800392150878906, + "objective/train/value_min": -0.861328125, + "objective/train/value_reward_corr": 0.338264404769824, + "objective/train/value_std": 0.02447509765625, + "objective/train/weight_avg": 0.9998384118080139, + "objective/train/weighted_lm_loss": 3.5850706100463867, + "objective/train/weights_max": 1.0448921918869019, + "objective/train/weights_min": 0.9072267413139343, + "theoretical_loss": 3.6291050564046676, + "tokens_seen": 1060044800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006857142857142857, + "loss": 1.677, + "theoretical_loss": 3.6290839346263644, + "tokens_seen": 1060110336 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006853932584269663, + "loss": 1.6888, + "theoretical_loss": 3.6287462133090616, + "tokens_seen": 1061158912 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": -0.01948131062090397, + "objective/train/docs_used": 606310, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1577811241149902, + "objective/train/original_loss": 3.1577811241149902, + "objective/train/theoretical_loss": 3.628577512793303, + "objective/train/tokens_used": 1082143200, + "objective/train/value_avg": -0.01175689697265625, + "objective/train/value_loss": 0.009648483246564865, + "objective/train/value_max": -0.0006070137023925781, + "objective/train/value_min": -0.39990234375, + "objective/train/value_reward_corr": 0.5243982047195441, + "objective/train/value_std": 0.011688232421875, + "objective/train/weight_avg": 0.9980995059013367, + "objective/train/weighted_lm_loss": 3.149221420288086, + "objective/train/weights_max": 1.0112687349319458, + "objective/train/weights_min": 0.9263339042663574, + "theoretical_loss": 3.628577512793303, + "tokens_seen": 1061683200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006850722311396469, + "loss": 1.646, + "theoretical_loss": 3.6284089188792445, + "tokens_seen": 1062207488 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006847512038523274, + "loss": 1.6922, + "theoretical_loss": 3.6280720503767077, + "tokens_seen": 1063256064 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.007335764355957508, + "objective/train/docs_used": 606853, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.269808053970337, + "objective/train/original_loss": 3.269807815551758, + "objective/train/theoretical_loss": 3.628051010215551, + "objective/train/tokens_used": 1083781600, + "objective/train/value_avg": -0.01332855224609375, + "objective/train/value_loss": 0.0018821690464392304, + "objective/train/value_max": -0.00040459632873535156, + "objective/train/value_min": -0.1842041015625, + "objective/train/value_reward_corr": 0.06097960190787336, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.000742793083191, + "objective/train/weighted_lm_loss": 3.2701690196990967, + "objective/train/weights_max": 1.0183043479919434, + "objective/train/weights_min": 0.9145936965942383, + "theoretical_loss": 3.628051010215551, + "tokens_seen": 1063321600 + }, + { + "epoch": 0.32, + "learning_rate": 0.000684430176565008, + "loss": 1.7311, + "theoretical_loss": 3.627735606844347, + "tokens_seen": 1064304640 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.004703077021986246, + "objective/train/docs_used": 607789, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.525740385055542, + "objective/train/original_loss": 3.525740146636963, + "objective/train/theoretical_loss": 3.62752554501776, + "objective/train/tokens_used": 1085420000, + "objective/train/value_avg": -0.033477783203125, + "objective/train/value_loss": 0.005138933192938566, + "objective/train/value_max": -0.0009288787841796875, + "objective/train/value_min": -0.61083984375, + "objective/train/value_reward_corr": 0.33564619976982707, + "objective/train/value_std": 0.046966552734375, + "objective/train/weight_avg": 1.0004957914352417, + "objective/train/weighted_lm_loss": 3.5262680053710938, + "objective/train/weights_max": 1.041597843170166, + "objective/train/weights_min": 0.9153583645820618, + "theoretical_loss": 3.62752554501776, + "tokens_seen": 1064960000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006841091492776886, + "loss": 1.7408, + "theoretical_loss": 3.627399587328153, + "tokens_seen": 1065353216 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006837881219903693, + "loss": 1.6841, + "theoretical_loss": 3.6270639908771907, + "tokens_seen": 1066401792 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.01057093869894743, + "objective/train/docs_used": 608475, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6855156421661377, + "objective/train/original_loss": 3.6855154037475586, + "objective/train/theoretical_loss": 3.627001113564699, + "objective/train/tokens_used": 1087058400, + "objective/train/value_avg": -0.0187225341796875, + "objective/train/value_loss": 0.003036881797015667, + "objective/train/value_max": -0.0009002685546875, + "objective/train/value_min": -0.521484375, + "objective/train/value_reward_corr": 0.29613310197140086, + "objective/train/value_std": 0.0204925537109375, + "objective/train/weight_avg": 1.0010720491409302, + "objective/train/weighted_lm_loss": 3.689093828201294, + "objective/train/weights_max": 1.0287871360778809, + "objective/train/weights_min": 0.910256564617157, + "theoretical_loss": 3.627001113564699, + "tokens_seen": 1066598400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006834670947030498, + "loss": 1.7238, + "theoretical_loss": 3.6267288165435922, + "tokens_seen": 1067450368 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": -0.0024937393609434366, + "objective/train/docs_used": 609657, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.426074266433716, + "objective/train/original_loss": 3.426074266433716, + "objective/train/theoretical_loss": 3.6264777122394327, + "objective/train/tokens_used": 1088696800, + "objective/train/value_avg": -0.01306915283203125, + "objective/train/value_loss": 0.0020958741661161184, + "objective/train/value_max": -0.0007014274597167969, + "objective/train/value_min": -0.1512451171875, + "objective/train/value_reward_corr": 0.34358284534431305, + "objective/train/value_std": 0.0103912353515625, + "objective/train/weight_avg": 0.9997609853744507, + "objective/train/weighted_lm_loss": 3.4255566596984863, + "objective/train/weights_max": 1.0128775835037231, + "objective/train/weights_min": 0.9131885766983032, + "theoretical_loss": 3.6264777122394327, + "tokens_seen": 1068236800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006831460674157304, + "loss": 1.727, + "theoretical_loss": 3.626394063382541, + "tokens_seen": 1068498944 + }, + { + "epoch": 0.32, + "learning_rate": 0.000682825040128411, + "loss": 1.6895, + "theoretical_loss": 3.62605973045226, + "tokens_seen": 1069547520 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0010751986410468817, + "objective/train/docs_used": 610408, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3744561672210693, + "objective/train/original_loss": 3.3744564056396484, + "objective/train/theoretical_loss": 3.625955337443205, + "objective/train/tokens_used": 1090335200, + "objective/train/value_avg": -0.0183563232421875, + "objective/train/value_loss": 0.00721268355846405, + "objective/train/value_max": -0.0008492469787597656, + "objective/train/value_min": -0.92431640625, + "objective/train/value_reward_corr": 0.4856098503218703, + "objective/train/value_std": 0.036773681640625, + "objective/train/weight_avg": 1.0001428127288818, + "objective/train/weighted_lm_loss": 3.3741185665130615, + "objective/train/weights_max": 1.0523878335952759, + "objective/train/weights_min": 0.9093905091285706, + "theoretical_loss": 3.625955337443205, + "tokens_seen": 1069875200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006825040128410916, + "loss": 1.6443, + "theoretical_loss": 3.6257258168139987, + "tokens_seen": 1070596096 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": -0.002029024064540863, + "objective/train/docs_used": 611059, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6497223377227783, + "objective/train/original_loss": 3.6497228145599365, + "objective/train/theoretical_loss": 3.6254339855953184, + "objective/train/tokens_used": 1091973600, + "objective/train/value_avg": -0.0230712890625, + "objective/train/value_loss": 0.007844051346182823, + "objective/train/value_max": -0.0006513595581054688, + "objective/train/value_min": -0.8916015625, + "objective/train/value_reward_corr": 0.3218746481464941, + "objective/train/value_std": 0.026123046875, + "objective/train/weight_avg": 0.9998356699943542, + "objective/train/weighted_lm_loss": 3.648761034011841, + "objective/train/weights_max": 1.0439386367797852, + "objective/train/weights_min": 0.9070360064506531, + "theoretical_loss": 3.6254339855953184, + "tokens_seen": 1071513600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006821829855537721, + "loss": 1.6603, + "theoretical_loss": 3.625392321532021, + "tokens_seen": 1071644672 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006818619582664527, + "loss": 1.6457, + "theoretical_loss": 3.6250592436735904, + "tokens_seen": 1072693248 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.010480794124305248, + "objective/train/docs_used": 612309, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9766147136688232, + "objective/train/original_loss": 2.9766149520874023, + "objective/train/theoretical_loss": 3.624913653133015, + "objective/train/tokens_used": 1093612000, + "objective/train/value_avg": -0.024810791015625, + "objective/train/value_loss": 0.00797299388796091, + "objective/train/value_max": -0.0007319450378417969, + "objective/train/value_min": -0.89501953125, + "objective/train/value_reward_corr": 0.6275132696263477, + "objective/train/value_std": 0.0531005859375, + "objective/train/weight_avg": 0.9989911913871765, + "objective/train/weighted_lm_loss": 2.9760351181030273, + "objective/train/weights_max": 1.0507762432098389, + "objective/train/weights_min": 0.9114724397659302, + "theoretical_loss": 3.624913653133015, + "tokens_seen": 1073152000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006815409309791333, + "loss": 1.6682, + "theoretical_loss": 3.624726582308961, + "tokens_seen": 1073741824 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0003245442931074649, + "objective/train/docs_used": 613082, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0142388343811035, + "objective/train/original_loss": 3.0142385959625244, + "objective/train/theoretical_loss": 3.624394336511362, + "objective/train/tokens_used": 1095250400, + "objective/train/value_avg": -0.0423583984375, + "objective/train/value_loss": 0.007258824538439512, + "objective/train/value_max": -0.0007410049438476562, + "objective/train/value_min": -0.9716796875, + "objective/train/value_reward_corr": 0.6795142382215954, + "objective/train/value_std": 0.08013916015625, + "objective/train/weight_avg": 1.0000685453414917, + "objective/train/weighted_lm_loss": 3.015281915664673, + "objective/train/weights_max": 1.0730187892913818, + "objective/train/weights_min": 0.9116520285606384, + "theoretical_loss": 3.624394336511362, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006812199036918138, + "loss": 1.6524, + "theoretical_loss": 3.624394336511362, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006808988764044944, + "loss": 1.6694, + "theoretical_loss": 3.6240625053569873, + "tokens_seen": 1075838976 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0057056983932852745, + "objective/train/docs_used": 614240, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4152190685272217, + "objective/train/original_loss": 3.4152188301086426, + "objective/train/theoretical_loss": 3.6238760322031336, + "objective/train/tokens_used": 1096888800, + "objective/train/value_avg": -0.0296783447265625, + "objective/train/value_loss": 0.008320309221744537, + "objective/train/value_max": -0.0007467269897460938, + "objective/train/value_min": -0.8359375, + "objective/train/value_reward_corr": 0.6692361330918932, + "objective/train/value_std": 0.05865478515625, + "objective/train/weight_avg": 0.9994702935218811, + "objective/train/weighted_lm_loss": 3.415252447128296, + "objective/train/weights_max": 1.033376693725586, + "objective/train/weights_min": 0.9126763939857483, + "theoretical_loss": 3.6238760322031336, + "tokens_seen": 1076428800 + }, + { + "epoch": 0.33, + "learning_rate": 0.000680577849117175, + "loss": 1.6458, + "theoretical_loss": 3.6237310879249813, + "tokens_seen": 1076887552 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006802568218298556, + "loss": 1.6638, + "theoretical_loss": 3.6234000832974282, + "tokens_seen": 1077936128 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0004476569883991033, + "objective/train/docs_used": 615029, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.807309627532959, + "objective/train/original_loss": 3.807309865951538, + "objective/train/theoretical_loss": 3.6233587366986946, + "objective/train/tokens_used": 1098527200, + "objective/train/value_avg": -0.016876220703125, + "objective/train/value_loss": 0.007011342793703079, + "objective/train/value_max": -0.0006361007690429688, + "objective/train/value_min": -0.94873046875, + "objective/train/value_reward_corr": 0.5551313182105245, + "objective/train/value_std": 0.040740966796875, + "objective/train/weight_avg": 0.9999896287918091, + "objective/train/weighted_lm_loss": 3.8071441650390625, + "objective/train/weights_max": 1.0591938495635986, + "objective/train/weights_min": 0.9068117141723633, + "theoretical_loss": 3.6233587366986946, + "tokens_seen": 1078067200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006799357945425361, + "loss": 1.6757, + "theoretical_loss": 3.623069490559339, + "tokens_seen": 1078984704 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0012180876219645143, + "objective/train/docs_used": 616160, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.213259696960449, + "objective/train/original_loss": 3.2132601737976074, + "objective/train/theoretical_loss": 3.6228424465058904, + "objective/train/tokens_used": 1100165600, + "objective/train/value_avg": -0.0181732177734375, + "objective/train/value_loss": 0.003807655069977045, + "objective/train/value_max": -0.0008864402770996094, + "objective/train/value_min": -0.970703125, + "objective/train/value_reward_corr": 0.4173822350364176, + "objective/train/value_std": 0.026153564453125, + "objective/train/weight_avg": 1.00014066696167, + "objective/train/weighted_lm_loss": 3.212758779525757, + "objective/train/weights_max": 1.0638066530227661, + "objective/train/weights_min": 0.9066489934921265, + "theoretical_loss": 3.6228424465058904, + "tokens_seen": 1079705600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006796147672552168, + "loss": 1.6049, + "theoretical_loss": 3.6227393087986393, + "tokens_seen": 1080033280 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006792937399678974, + "loss": 1.663, + "theoretical_loss": 3.622409537106158, + "tokens_seen": 1081081856 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.008917906321585178, + "objective/train/docs_used": 616786, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6790268421173096, + "objective/train/original_loss": 3.6790270805358887, + "objective/train/theoretical_loss": 3.622327158149928, + "objective/train/tokens_used": 1101804000, + "objective/train/value_avg": -0.0258026123046875, + "objective/train/value_loss": 0.0033222550991922617, + "objective/train/value_max": -0.0006165504455566406, + "objective/train/value_min": -0.9609375, + "objective/train/value_reward_corr": 0.5085405073106415, + "objective/train/value_std": 0.03759765625, + "objective/train/weight_avg": 1.0009081363677979, + "objective/train/weighted_lm_loss": 3.682302474975586, + "objective/train/weights_max": 1.0625356435775757, + "objective/train/weights_min": 0.9090993404388428, + "theoretical_loss": 3.622327158149928, + "tokens_seen": 1081344000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006789727126805778, + "loss": 1.6665, + "theoretical_loss": 3.622080174575613, + "tokens_seen": 1082130432 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.00460036238655448, + "objective/train/docs_used": 617451, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.30655574798584, + "objective/train/original_loss": 3.3065555095672607, + "objective/train/theoretical_loss": 3.6218128681732686, + "objective/train/tokens_used": 1103442400, + "objective/train/value_avg": -0.0233001708984375, + "objective/train/value_loss": 0.003436173778027296, + "objective/train/value_max": -0.0005636215209960938, + "objective/train/value_min": -0.6337890625, + "objective/train/value_reward_corr": 0.3666792960491932, + "objective/train/value_std": 0.0261383056640625, + "objective/train/weight_avg": 1.0004769563674927, + "objective/train/weighted_lm_loss": 3.3082540035247803, + "objective/train/weights_max": 1.040029764175415, + "objective/train/weights_min": 0.9131376147270203, + "theoretical_loss": 3.6218128681732686, + "tokens_seen": 1082982400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006786516853932584, + "loss": 1.6373, + "theoretical_loss": 3.6217512203036026, + "tokens_seen": 1083179008 + }, + { + "epoch": 0.33, + "learning_rate": 0.000678330658105939, + "loss": 1.6644, + "theoretical_loss": 3.621422673389592, + "tokens_seen": 1084227584 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.010639947839081287, + "objective/train/docs_used": 618647, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.783329486846924, + "objective/train/original_loss": 3.783329486846924, + "objective/train/theoretical_loss": 3.621299573135513, + "objective/train/tokens_used": 1105080800, + "objective/train/value_avg": -0.0197296142578125, + "objective/train/value_loss": 0.0031274554785341024, + "objective/train/value_max": -0.0005192756652832031, + "objective/train/value_min": -0.8154296875, + "objective/train/value_reward_corr": 0.23487725148390265, + "objective/train/value_std": 0.025604248046875, + "objective/train/weight_avg": 1.0010794401168823, + "objective/train/weighted_lm_loss": 3.7868711948394775, + "objective/train/weights_max": 1.0348474979400635, + "objective/train/weights_min": 0.9070941805839539, + "theoretical_loss": 3.621299573135513, + "tokens_seen": 1084620800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006780096308186196, + "loss": 1.6156, + "theoretical_loss": 3.6210945329358992, + "tokens_seen": 1085276160 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.003237740136682987, + "objective/train/docs_used": 619458, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.260509729385376, + "objective/train/original_loss": 3.260509490966797, + "objective/train/theoretical_loss": 3.620787269613291, + "objective/train/tokens_used": 1106719200, + "objective/train/value_avg": -0.02032470703125, + "objective/train/value_loss": 0.007066826336085796, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.8408203125, + "objective/train/value_reward_corr": 0.3937684400883113, + "objective/train/value_std": 0.037109375, + "objective/train/weight_avg": 1.0003584623336792, + "objective/train/weighted_lm_loss": 3.260756731033325, + "objective/train/weights_max": 1.0501593351364136, + "objective/train/weights_min": 0.9073071479797363, + "theoretical_loss": 3.620787269613291, + "tokens_seen": 1086259200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006776886035313001, + "loss": 1.6237, + "theoretical_loss": 3.6207667980476868, + "tokens_seen": 1086324736 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006773675762439807, + "loss": 1.6407, + "theoretical_loss": 3.620439467832949, + "tokens_seen": 1087373312 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.01287365797907114, + "objective/train/docs_used": 620460, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.412691116333008, + "objective/train/original_loss": 3.412691116333008, + "objective/train/theoretical_loss": 3.620275954200152, + "objective/train/tokens_used": 1108357600, + "objective/train/value_avg": -0.0212554931640625, + "objective/train/value_loss": 0.0018135557183995843, + "objective/train/value_max": -0.0007824897766113281, + "objective/train/value_min": -0.509765625, + "objective/train/value_reward_corr": 0.2676708563827241, + "objective/train/value_std": 0.0262298583984375, + "objective/train/weight_avg": 1.0012964010238647, + "objective/train/weighted_lm_loss": 3.4171295166015625, + "objective/train/weights_max": 1.0520721673965454, + "objective/train/weights_min": 0.9284533262252808, + "theoretical_loss": 3.620275954200152, + "tokens_seen": 1087897600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006770465489566613, + "loss": 1.6217, + "theoretical_loss": 3.6201125414024986, + "tokens_seen": 1088421888 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006767255216693418, + "loss": 1.6222, + "theoretical_loss": 3.619786017869957, + "tokens_seen": 1089470464 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0007879595505073667, + "objective/train/docs_used": 620976, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.126026153564453, + "objective/train/original_loss": 3.126025676727295, + "objective/train/theoretical_loss": 3.619765623506458, + "objective/train/tokens_used": 1109996000, + "objective/train/value_avg": -0.0192413330078125, + "objective/train/value_loss": 0.0022088068071752787, + "objective/train/value_max": -0.0006823539733886719, + "objective/train/value_min": -0.3828125, + "objective/train/value_reward_corr": 0.705688508285312, + "objective/train/value_std": 0.0283050537109375, + "objective/train/weight_avg": 1.0000897645950317, + "objective/train/weighted_lm_loss": 3.127189874649048, + "objective/train/weights_max": 1.0249391794204712, + "objective/train/weights_min": 0.9742932319641113, + "theoretical_loss": 3.619765623506458, + "tokens_seen": 1089536000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006764044943820224, + "loss": 1.6537, + "theoretical_loss": 3.619459896351742, + "tokens_seen": 1090519040 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0026013373862951994, + "objective/train/docs_used": 621707, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0979368686676025, + "objective/train/original_loss": 3.0979366302490234, + "objective/train/theoretical_loss": 3.6192562741592726, + "objective/train/tokens_used": 1111634400, + "objective/train/value_avg": -0.018310546875, + "objective/train/value_loss": 0.004976273514330387, + "objective/train/value_max": -0.0005173683166503906, + "objective/train/value_min": -0.396484375, + "objective/train/value_reward_corr": 0.3174145384975968, + "objective/train/value_std": 0.0189666748046875, + "objective/train/weight_avg": 1.0002846717834473, + "objective/train/weighted_lm_loss": 3.098435163497925, + "objective/train/weights_max": 1.0403562784194946, + "objective/train/weights_min": 0.9235708117485046, + "theoretical_loss": 3.6192562741592726, + "tokens_seen": 1091174400 + }, + { + "epoch": 0.33, + "learning_rate": 0.000676083467094703, + "loss": 1.6271, + "theoretical_loss": 3.6191341759670568, + "tokens_seen": 1091567616 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006757624398073837, + "loss": 1.6576, + "theoretical_loss": 3.618808855837877, + "tokens_seen": 1092616192 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.00776270916685462, + "objective/train/docs_used": 622698, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7022767066955566, + "objective/train/original_loss": 3.7022767066955566, + "objective/train/theoretical_loss": 3.6187479028022547, + "objective/train/tokens_used": 1113272800, + "objective/train/value_avg": -0.03021240234375, + "objective/train/value_loss": 0.006087567191570997, + "objective/train/value_max": -0.0006046295166015625, + "objective/train/value_min": -0.77734375, + "objective/train/value_reward_corr": 0.7771329050237255, + "objective/train/value_std": 0.063232421875, + "objective/train/weight_avg": 0.9992537498474121, + "objective/train/weighted_lm_loss": 3.6994082927703857, + "objective/train/weights_max": 1.0417643785476685, + "objective/train/weights_min": 0.9121315479278564, + "theoretical_loss": 3.6187479028022547, + "tokens_seen": 1092812800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006754414125200642, + "loss": 1.6538, + "theoretical_loss": 3.6184839350889417, + "tokens_seen": 1093664768 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.010990657843649387, + "objective/train/docs_used": 623416, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5622217655181885, + "objective/train/original_loss": 3.5622217655181885, + "objective/train/theoretical_loss": 3.6182405060955523, + "objective/train/tokens_used": 1114911200, + "objective/train/value_avg": -0.050079345703125, + "objective/train/value_loss": 0.01751307211816311, + "objective/train/value_max": -0.0007615089416503906, + "objective/train/value_min": -0.978515625, + "objective/train/value_reward_corr": 0.7840408039536995, + "objective/train/value_std": 0.102783203125, + "objective/train/weight_avg": 0.9989868998527527, + "objective/train/weighted_lm_loss": 3.5618953704833984, + "objective/train/weights_max": 1.0697234869003296, + "objective/train/weights_min": 0.9075410962104797, + "theoretical_loss": 3.6182405060955523, + "tokens_seen": 1094451200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006751203852327448, + "loss": 1.6139, + "theoretical_loss": 3.6181594128477395, + "tokens_seen": 1094713344 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006747993579454254, + "loss": 1.6242, + "theoretical_loss": 3.6178352882444997, + "tokens_seen": 1095761920 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.00013553508324548602, + "objective/train/docs_used": 624708, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4842369556427, + "objective/train/original_loss": 3.484236717224121, + "objective/train/theoretical_loss": 3.617734080715698, + "objective/train/tokens_used": 1116549600, + "objective/train/value_avg": -0.0216064453125, + "objective/train/value_loss": 0.0036173772532492876, + "objective/train/value_max": -0.0003669261932373047, + "objective/train/value_min": -0.446533203125, + "objective/train/value_reward_corr": 0.7476301287369997, + "objective/train/value_std": 0.045013427734375, + "objective/train/weight_avg": 1.0000042915344238, + "objective/train/weighted_lm_loss": 3.4835219383239746, + "objective/train/weights_max": 1.022473931312561, + "objective/train/weights_min": 0.9182971715927124, + "theoretical_loss": 3.617734080715698, + "tokens_seen": 1096089600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006744783306581059, + "loss": 1.6454, + "theoretical_loss": 3.6175115604121793, + "tokens_seen": 1096810496 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.004078730940818787, + "objective/train/docs_used": 625328, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.222970485687256, + "objective/train/original_loss": 3.2229702472686768, + "objective/train/theoretical_loss": 3.617228623355502, + "objective/train/tokens_used": 1118188000, + "objective/train/value_avg": -0.01470184326171875, + "objective/train/value_loss": 0.0012559014139696956, + "objective/train/value_max": -0.0006237030029296875, + "objective/train/value_min": -0.344970703125, + "objective/train/value_reward_corr": 0.2396216153136417, + "objective/train/value_std": 0.0165557861328125, + "objective/train/weight_avg": 1.0004140138626099, + "objective/train/weighted_lm_loss": 3.2248952388763428, + "objective/train/weights_max": 1.02712881565094, + "objective/train/weights_min": 0.9235528707504272, + "theoretical_loss": 3.617228623355502, + "tokens_seen": 1097728000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006741573033707865, + "loss": 1.6345, + "theoretical_loss": 3.6171882284864525, + "tokens_seen": 1097859072 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006738362760834671, + "loss": 1.637, + "theoretical_loss": 3.6168652916056994, + "tokens_seen": 1098907648 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0013358796713873744, + "objective/train/docs_used": 626712, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8937013149261475, + "objective/train/original_loss": 2.8937008380889893, + "objective/train/theoretical_loss": 3.616724130723951, + "objective/train/tokens_used": 1119826400, + "objective/train/value_avg": -0.0185394287109375, + "objective/train/value_loss": 0.00902184285223484, + "objective/train/value_max": -0.0004954338073730469, + "objective/train/value_min": -0.890625, + "objective/train/value_reward_corr": 0.5007480658884216, + "objective/train/value_std": 0.0418701171875, + "objective/train/weight_avg": 0.9999105334281921, + "objective/train/weighted_lm_loss": 2.8924524784088135, + "objective/train/weights_max": 1.0622427463531494, + "objective/train/weights_min": 0.9068962335586548, + "theoretical_loss": 3.616724130723951, + "tokens_seen": 1099366400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006735152487961477, + "loss": 1.6293, + "theoretical_loss": 3.6165427489109963, + "tokens_seen": 1099956224 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.005404728930443525, + "objective/train/docs_used": 627263, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1147913932800293, + "objective/train/original_loss": 3.1147913932800293, + "objective/train/theoretical_loss": 3.616220599546101, + "objective/train/tokens_used": 1121464800, + "objective/train/value_avg": -0.01551055908203125, + "objective/train/value_loss": 0.002189942169934511, + "objective/train/value_max": -0.00064849853515625, + "objective/train/value_min": -0.72021484375, + "objective/train/value_reward_corr": 0.33348003382165275, + "objective/train/value_std": 0.0222625732421875, + "objective/train/weight_avg": 1.0005512237548828, + "objective/train/weighted_lm_loss": 3.1166255474090576, + "objective/train/weights_max": 1.0307048559188843, + "objective/train/weights_min": 0.9205353260040283, + "theoretical_loss": 3.616220599546101, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006731942215088282, + "loss": 1.623, + "theoretical_loss": 3.616220599546101, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006728731942215088, + "loss": 1.6347, + "theoretical_loss": 3.615898842657448, + "tokens_seen": 1102053376 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.005592333152890205, + "objective/train/docs_used": 628470, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.397029399871826, + "objective/train/original_loss": 3.3970296382904053, + "objective/train/theoretical_loss": 3.6157180265629814, + "objective/train/tokens_used": 1123103200, + "objective/train/value_avg": -0.01358795166015625, + "objective/train/value_loss": 0.0021351163741201162, + "objective/train/value_max": -0.0006237030029296875, + "objective/train/value_min": -0.470947265625, + "objective/train/value_reward_corr": 0.41590998900842896, + "objective/train/value_std": 0.019927978515625, + "objective/train/weight_avg": 1.0005698204040527, + "objective/train/weighted_lm_loss": 3.3987479209899902, + "objective/train/weights_max": 1.0438586473464966, + "objective/train/weights_min": 0.9135516285896301, + "theoretical_loss": 3.6157180265629814, + "tokens_seen": 1102643200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006725521669341894, + "loss": 1.5801, + "theoretical_loss": 3.6155774773941305, + "tokens_seen": 1103101952 + }, + { + "epoch": 0.33, + "learning_rate": 0.00067223113964687, + "loss": 1.6392, + "theoretical_loss": 3.615256502907896, + "tokens_seen": 1104150528 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.010358575731515884, + "objective/train/docs_used": 629051, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.988149881362915, + "objective/train/original_loss": 2.988150119781494, + "objective/train/theoretical_loss": 3.6152164085314853, + "objective/train/tokens_used": 1124741600, + "objective/train/value_avg": -0.0261077880859375, + "objective/train/value_loss": 0.005815399810671806, + "objective/train/value_max": -0.0008935928344726562, + "objective/train/value_min": -0.95068359375, + "objective/train/value_reward_corr": 0.5694352431743958, + "objective/train/value_std": 0.05548095703125, + "objective/train/weight_avg": 1.0010645389556885, + "objective/train/weighted_lm_loss": 2.991337537765503, + "objective/train/weights_max": 1.0758916139602661, + "objective/train/weights_min": 0.9073789715766907, + "theoretical_loss": 3.6152164085314853, + "tokens_seen": 1104281600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006719101123595505, + "loss": 1.6452, + "theoretical_loss": 3.6149359183531296, + "tokens_seen": 1105199104 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.003692612051963806, + "objective/train/docs_used": 630354, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.546625852584839, + "objective/train/original_loss": 3.546626329421997, + "objective/train/theoretical_loss": 3.614715742224278, + "objective/train/tokens_used": 1126380000, + "objective/train/value_avg": -0.01251983642578125, + "objective/train/value_loss": 0.00294207944534719, + "objective/train/value_max": -0.00033283233642578125, + "objective/train/value_min": -0.189208984375, + "objective/train/value_reward_corr": 0.09727872351503716, + "objective/train/value_std": 0.01363372802734375, + "objective/train/weight_avg": 1.0003836154937744, + "objective/train/weighted_lm_loss": 3.5474443435668945, + "objective/train/weights_max": 1.017562747001648, + "objective/train/weights_min": 0.9096466898918152, + "theoretical_loss": 3.614715742224278, + "tokens_seen": 1105920000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006715890850722312, + "loss": 1.6334, + "theoretical_loss": 3.614615722886849, + "tokens_seen": 1106247680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006712680577849118, + "loss": 1.6028, + "theoretical_loss": 3.614295915668691, + "tokens_seen": 1107296256 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0060876356437802315, + "objective/train/docs_used": 630918, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9983317852020264, + "objective/train/original_loss": 2.9983322620391846, + "objective/train/theoretical_loss": 3.6142160244296884, + "objective/train/tokens_used": 1128018400, + "objective/train/value_avg": -0.01751708984375, + "objective/train/value_loss": 0.0014659822918474674, + "objective/train/value_max": -0.0009002685546875, + "objective/train/value_min": -0.373779296875, + "objective/train/value_reward_corr": 0.2609782596717824, + "objective/train/value_std": 0.0185546875, + "objective/train/weight_avg": 1.0006160736083984, + "objective/train/weighted_lm_loss": 3.000129461288452, + "objective/train/weights_max": 1.0353434085845947, + "objective/train/weights_min": 0.9395684003829956, + "theoretical_loss": 3.6142160244296884, + "tokens_seen": 1107558400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006709470304975923, + "loss": 1.6299, + "theoretical_loss": 3.613976495860898, + "tokens_seen": 1108344832 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0004866425588261336, + "objective/train/docs_used": 632203, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9483718872070312, + "objective/train/original_loss": 2.9483718872070312, + "objective/train/theoretical_loss": 3.6137172519516163, + "objective/train/tokens_used": 1129656800, + "objective/train/value_avg": -0.0160369873046875, + "objective/train/value_loss": 0.006514386739581823, + "objective/train/value_max": -0.0004878044128417969, + "objective/train/value_min": -0.919921875, + "objective/train/value_reward_corr": 0.42663161765686164, + "objective/train/value_std": 0.032135009765625, + "objective/train/weight_avg": 0.9999832510948181, + "objective/train/weighted_lm_loss": 2.9478890895843506, + "objective/train/weights_max": 1.060662031173706, + "objective/train/weights_min": 0.9102292656898499, + "theoretical_loss": 3.6137172519516163, + "tokens_seen": 1109196800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006706260032102729, + "loss": 1.6124, + "theoretical_loss": 3.613657462628315, + "tokens_seen": 1109393408 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006703049759229535, + "loss": 1.6413, + "theoretical_loss": 3.613338815138371, + "tokens_seen": 1110441984 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.01315945852547884, + "objective/train/docs_used": 632721, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6938982009887695, + "objective/train/original_loss": 3.6938986778259277, + "objective/train/theoretical_loss": 3.6132194216094313, + "objective/train/tokens_used": 1131295200, + "objective/train/value_avg": -0.036651611328125, + "objective/train/value_loss": 0.005500032100826502, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.91162109375, + "objective/train/value_reward_corr": 0.5349737010453041, + "objective/train/value_std": 0.061126708984375, + "objective/train/weight_avg": 1.0013432502746582, + "objective/train/weighted_lm_loss": 3.6992340087890625, + "objective/train/weights_max": 1.0508477687835693, + "objective/train/weights_min": 0.9100321531295776, + "theoretical_loss": 3.6132194216094313, + "tokens_seen": 1110835200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006699839486356341, + "loss": 1.604, + "theoretical_loss": 3.613020552561074, + "tokens_seen": 1111490560 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.006910497322678566, + "objective/train/docs_used": 634025, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.913102149963379, + "objective/train/original_loss": 2.9131016731262207, + "objective/train/theoretical_loss": 3.612722530237875, + "objective/train/tokens_used": 1132933600, + "objective/train/value_avg": -0.0147247314453125, + "objective/train/value_loss": 0.0014333192957565188, + "objective/train/value_max": -0.0005860328674316406, + "objective/train/value_min": -0.2181396484375, + "objective/train/value_reward_corr": 0.28665818060888637, + "objective/train/value_std": 0.01512908935546875, + "objective/train/weight_avg": 1.000698208808899, + "objective/train/weighted_lm_loss": 2.914504051208496, + "objective/train/weights_max": 1.0186700820922852, + "objective/train/weights_min": 0.9446974396705627, + "theoretical_loss": 3.612722530237875, + "tokens_seen": 1112473600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006696629213483146, + "loss": 1.5964, + "theoretical_loss": 3.6127026740689967, + "tokens_seen": 1112539136 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006693418940609952, + "loss": 1.6678, + "theoretical_loss": 3.612385178837271, + "tokens_seen": 1113587712 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.004959174897521734, + "objective/train/docs_used": 634689, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1363065242767334, + "objective/train/original_loss": 3.1363065242767334, + "objective/train/theoretical_loss": 3.6122265746869653, + "objective/train/tokens_used": 1134572000, + "objective/train/value_avg": -0.0205535888671875, + "objective/train/value_loss": 0.004312584176659584, + "objective/train/value_max": -0.0004839897155761719, + "objective/train/value_min": -0.58447265625, + "objective/train/value_reward_corr": 0.5715411949456454, + "objective/train/value_std": 0.03607177734375, + "objective/train/weight_avg": 1.0005171298980713, + "objective/train/weighted_lm_loss": 3.138270616531372, + "objective/train/weights_max": 1.0448665618896484, + "objective/train/weights_min": 0.9086219668388367, + "theoretical_loss": 3.6122265746869653, + "tokens_seen": 1114112000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006690208667736758, + "loss": 1.6188, + "theoretical_loss": 3.6120680660435736, + "tokens_seen": 1114636288 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006686998394863563, + "loss": 1.6215, + "theoretical_loss": 3.6117513348681163, + "tokens_seen": 1115684864 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.005231012124568224, + "objective/train/docs_used": 636068, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9556798934936523, + "objective/train/original_loss": 2.9556801319122314, + "objective/train/theoretical_loss": 3.611731551821899, + "objective/train/tokens_used": 1136210400, + "objective/train/value_avg": -0.01751708984375, + "objective/train/value_loss": 0.004457234404981136, + "objective/train/value_max": -0.0003936290740966797, + "objective/train/value_min": -0.85107421875, + "objective/train/value_reward_corr": 0.4600300555625934, + "objective/train/value_std": 0.038482666015625, + "objective/train/weight_avg": 1.0005449056625366, + "objective/train/weighted_lm_loss": 2.9567019939422607, + "objective/train/weights_max": 1.0496535301208496, + "objective/train/weights_min": 0.9101418852806091, + "theoretical_loss": 3.611731551821899, + "tokens_seen": 1115750400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006683788121990369, + "loss": 1.6431, + "theoretical_loss": 3.611434984493637, + "tokens_seen": 1116733440 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.007773727644234896, + "objective/train/docs_used": 636687, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.56254506111145, + "objective/train/original_loss": 2.56254506111145, + "objective/train/theoretical_loss": 3.6112374585229583, + "objective/train/tokens_used": 1137848800, + "objective/train/value_avg": -0.0125732421875, + "objective/train/value_loss": 0.00023270621022675186, + "objective/train/value_max": -0.00018966197967529297, + "objective/train/value_min": -0.13330078125, + "objective/train/value_reward_corr": 0.1992785590389791, + "objective/train/value_std": 0.01030731201171875, + "objective/train/weight_avg": 1.0007785558700562, + "objective/train/weighted_lm_loss": 2.5644493103027344, + "objective/train/weights_max": 1.0131393671035767, + "objective/train/weights_min": 0.9855517745018005, + "theoretical_loss": 3.6112374585229583, + "tokens_seen": 1117388800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006680577849117175, + "loss": 1.619, + "theoretical_loss": 3.6111190141053893, + "tokens_seen": 1117782016 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006677367576243982, + "loss": 1.6025, + "theoretical_loss": 3.6108034228911334, + "tokens_seen": 1118830592 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.006081356666982174, + "objective/train/docs_used": 638066, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0889194011688232, + "objective/train/original_loss": 3.0889196395874023, + "objective/train/theoretical_loss": 3.6107442916854158, + "objective/train/tokens_used": 1139487200, + "objective/train/value_avg": -0.0264739990234375, + "objective/train/value_loss": 0.00692475913092494, + "objective/train/value_max": -0.0005054473876953125, + "objective/train/value_min": -0.92626953125, + "objective/train/value_reward_corr": 0.45226877904713675, + "objective/train/value_std": 0.051910400390625, + "objective/train/weight_avg": 1.0006422996520996, + "objective/train/weighted_lm_loss": 3.0892322063446045, + "objective/train/weights_max": 1.0695021152496338, + "objective/train/weights_min": 0.9157975912094116, + "theoretical_loss": 3.6107442916854158, + "tokens_seen": 1119027200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006674157303370787, + "loss": 1.6077, + "theoretical_loss": 3.6104882100411215, + "tokens_seen": 1119879168 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.00297396257519722, + "objective/train/docs_used": 638756, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8720688819885254, + "objective/train/original_loss": 2.8720688819885254, + "objective/train/theoretical_loss": 3.6102520482194387, + "objective/train/tokens_used": 1141125600, + "objective/train/value_avg": -0.02099609375, + "objective/train/value_loss": 0.006460551638156176, + "objective/train/value_max": -0.0008039474487304688, + "objective/train/value_min": -0.955078125, + "objective/train/value_reward_corr": 0.5155094893978582, + "objective/train/value_std": 0.04766845703125, + "objective/train/weight_avg": 1.0003291368484497, + "objective/train/weighted_lm_loss": 2.872591733932495, + "objective/train/weights_max": 1.0830143690109253, + "objective/train/weights_min": 0.9095665216445923, + "theoretical_loss": 3.6102520482194387, + "tokens_seen": 1120665600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006670947030497593, + "loss": 1.6057, + "theoretical_loss": 3.6101733747480957, + "tokens_seen": 1120927744 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006667736757624399, + "loss": 1.5903, + "theoretical_loss": 3.609858916207269, + "tokens_seen": 1121976320 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.005533644929528236, + "objective/train/docs_used": 639983, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.571763515472412, + "objective/train/original_loss": 3.571763515472412, + "objective/train/theoretical_loss": 3.6097607250499975, + "objective/train/tokens_used": 1142764000, + "objective/train/value_avg": -0.01495361328125, + "objective/train/value_loss": 0.0022537128534168005, + "objective/train/value_max": -0.0006799697875976562, + "objective/train/value_min": -0.71630859375, + "objective/train/value_reward_corr": 0.42171199194777087, + "objective/train/value_std": 0.0232391357421875, + "objective/train/weight_avg": 1.000564455986023, + "objective/train/weighted_lm_loss": 3.5737593173980713, + "objective/train/weights_max": 1.0361096858978271, + "objective/train/weights_min": 0.9125643968582153, + "theoretical_loss": 3.6097607250499975, + "tokens_seen": 1122304000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006664526484751204, + "loss": 1.5938, + "theoretical_loss": 3.609544833616324, + "tokens_seen": 1123024896 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0036188948433846235, + "objective/train/docs_used": 640598, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6194331645965576, + "objective/train/original_loss": 3.6194331645965576, + "objective/train/theoretical_loss": 3.6092703191167743, + "objective/train/tokens_used": 1144402400, + "objective/train/value_avg": -0.0223236083984375, + "objective/train/value_loss": 0.006066563539206982, + "objective/train/value_max": -0.00046181678771972656, + "objective/train/value_min": -0.8564453125, + "objective/train/value_reward_corr": 0.4508170215415434, + "objective/train/value_std": 0.034912109375, + "objective/train/weight_avg": 1.0003917217254639, + "objective/train/weighted_lm_loss": 3.620123863220215, + "objective/train/weights_max": 1.03890860080719, + "objective/train/weights_min": 0.9111797213554382, + "theoretical_loss": 3.6092703191167743, + "tokens_seen": 1123942400 + }, + { + "epoch": 0.34, + "learning_rate": 0.000666131621187801, + "loss": 1.5985, + "theoretical_loss": 3.6092311261753958, + "tokens_seen": 1124073472 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006658105939004816, + "loss": 1.6022, + "theoretical_loss": 3.608917793087066, + "tokens_seen": 1125122048 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.005748809780925512, + "objective/train/docs_used": 641290, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9745609760284424, + "objective/train/original_loss": 2.974560499191284, + "objective/train/theoretical_loss": 3.6087808273740674, + "objective/train/tokens_used": 1146040800, + "objective/train/value_avg": -0.0142669677734375, + "objective/train/value_loss": 0.0014996971003711224, + "objective/train/value_max": -0.0007181167602539062, + "objective/train/value_min": -0.42333984375, + "objective/train/value_reward_corr": 0.28045443138578563, + "objective/train/value_std": 0.01406097412109375, + "objective/train/weight_avg": 1.0005823373794556, + "objective/train/weighted_lm_loss": 2.9759018421173096, + "objective/train/weights_max": 1.0187280178070068, + "objective/train/weights_min": 0.9203839898109436, + "theoretical_loss": 3.6087808273740674, + "tokens_seen": 1125580800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006654895666131622, + "loss": 1.554, + "theoretical_loss": 3.608604833556355, + "tokens_seen": 1126170624 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0011326707899570465, + "objective/train/docs_used": 642585, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9496779441833496, + "objective/train/original_loss": 2.9496781826019287, + "objective/train/theoretical_loss": 3.6082922467907066, + "objective/train/tokens_used": 1147679200, + "objective/train/value_avg": -0.0133056640625, + "objective/train/value_loss": 0.00344732659868896, + "objective/train/value_max": -0.0005884170532226562, + "objective/train/value_min": -0.291748046875, + "objective/train/value_reward_corr": 0.17585993883114973, + "objective/train/value_std": 0.0112457275390625, + "objective/train/weight_avg": 0.999903678894043, + "objective/train/weighted_lm_loss": 2.9504337310791016, + "objective/train/weights_max": 1.021902084350586, + "objective/train/weights_min": 0.9191075563430786, + "theoretical_loss": 3.6082922467907066, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006651685393258427, + "loss": 1.5929, + "theoretical_loss": 3.6082922467907066, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006648475120385233, + "loss": 1.5737, + "theoretical_loss": 3.6079800319999817, + "tokens_seen": 1128267776 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0010718146804720163, + "objective/train/docs_used": 643131, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.147367000579834, + "objective/train/original_loss": 3.147367000579834, + "objective/train/theoretical_loss": 3.607804574349956, + "objective/train/tokens_used": 1149317600, + "objective/train/value_avg": -0.0273284912109375, + "objective/train/value_loss": 0.007121752947568893, + "objective/train/value_max": -0.0004673004150390625, + "objective/train/value_min": -0.59521484375, + "objective/train/value_reward_corr": 0.21569188901561656, + "objective/train/value_std": 0.03826904296875, + "objective/train/weight_avg": 0.9999281167984009, + "objective/train/weighted_lm_loss": 3.1485142707824707, + "objective/train/weights_max": 1.0545259714126587, + "objective/train/weights_min": 0.9212514758110046, + "theoretical_loss": 3.607804574349956, + "tokens_seen": 1128857600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006645264847512039, + "loss": 1.5859, + "theoretical_loss": 3.60766818839645, + "tokens_seen": 1129316352 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006642054574638846, + "loss": 1.6183, + "theoretical_loss": 3.6073567151947774, + "tokens_seen": 1130364928 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.00874821562319994, + "objective/train/docs_used": 644333, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.114718437194824, + "objective/train/original_loss": 3.114718198776245, + "objective/train/theoretical_loss": 3.6073178070494287, + "objective/train/tokens_used": 1150956000, + "objective/train/value_avg": -0.021087646484375, + "objective/train/value_loss": 0.004532423336058855, + "objective/train/value_max": -0.0009365081787109375, + "objective/train/value_min": -0.59619140625, + "objective/train/value_reward_corr": 0.3235182264359353, + "objective/train/value_std": 0.031280517578125, + "objective/train/weight_avg": 1.0008971691131592, + "objective/train/weighted_lm_loss": 3.1168713569641113, + "objective/train/weights_max": 1.0576077699661255, + "objective/train/weights_min": 0.9093126058578491, + "theoretical_loss": 3.6073178070494287, + "tokens_seen": 1130496000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006638844301765649, + "loss": 1.6019, + "theoretical_loss": 3.607045611612018, + "tokens_seen": 1131413504 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.02195058763027191, + "objective/train/docs_used": 644979, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.047297477722168, + "objective/train/original_loss": 3.047297477722168, + "objective/train/theoretical_loss": 3.6068319419009978, + "objective/train/tokens_used": 1152594400, + "objective/train/value_avg": -0.055572509765625, + "objective/train/value_loss": 0.017485905438661575, + "objective/train/value_max": -0.0005235671997070312, + "objective/train/value_min": -0.9013671875, + "objective/train/value_reward_corr": 0.8977511048036667, + "objective/train/value_std": 0.1309814453125, + "objective/train/weight_avg": 0.9978910088539124, + "objective/train/weighted_lm_loss": 3.044795036315918, + "objective/train/weights_max": 1.0688581466674805, + "objective/train/weights_min": 0.9102870225906372, + "theoretical_loss": 3.6068319419009978, + "tokens_seen": 1132134400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006635634028892456, + "loss": 1.581, + "theoretical_loss": 3.6067348768676064, + "tokens_seen": 1132462080 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006632423756019262, + "loss": 1.5843, + "theoretical_loss": 3.606424510183343, + "tokens_seen": 1133510656 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.007174018770456314, + "objective/train/docs_used": 646384, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.3680219650268555, + "objective/train/original_loss": 2.3680222034454346, + "objective/train/theoretical_loss": 3.6063469759307054, + "objective/train/tokens_used": 1154232800, + "objective/train/value_avg": -0.0170745849609375, + "objective/train/value_loss": 0.003251563059166074, + "objective/train/value_max": -0.0006289482116699219, + "objective/train/value_min": -0.86328125, + "objective/train/value_reward_corr": 0.524841753173039, + "objective/train/value_std": 0.0285186767578125, + "objective/train/weight_avg": 1.0007333755493164, + "objective/train/weighted_lm_loss": 2.3691110610961914, + "objective/train/weights_max": 1.0284475088119507, + "objective/train/weights_min": 0.913192629814148, + "theoretical_loss": 3.6063469759307054, + "tokens_seen": 1133772800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006629213483146067, + "loss": 1.5836, + "theoretical_loss": 3.606114510783391, + "tokens_seen": 1134559232 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0026171510107815266, + "objective/train/docs_used": 647116, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.534496307373047, + "objective/train/original_loss": 3.534496545791626, + "objective/train/theoretical_loss": 3.6058629061786784, + "objective/train/tokens_used": 1155871200, + "objective/train/value_avg": -0.01629638671875, + "objective/train/value_loss": 0.005543517880141735, + "objective/train/value_max": -0.0005192756652832031, + "objective/train/value_min": -0.5419921875, + "objective/train/value_reward_corr": 0.3625522920252277, + "objective/train/value_std": 0.0212860107421875, + "objective/train/weight_avg": 1.0002888441085815, + "objective/train/weighted_lm_loss": 3.5351758003234863, + "objective/train/weights_max": 1.0346341133117676, + "objective/train/weights_min": 0.9192997813224792, + "theoretical_loss": 3.6058629061786784, + "tokens_seen": 1135411200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006626003210272873, + "loss": 1.5919, + "theoretical_loss": 3.605804877894263, + "tokens_seen": 1135607808 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006622792937399679, + "loss": 1.6002, + "theoretical_loss": 3.6054956107448124, + "tokens_seen": 1136656384 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.009675001725554466, + "objective/train/docs_used": 648555, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.504939079284668, + "objective/train/original_loss": 2.504939317703247, + "objective/train/theoretical_loss": 3.605379729699039, + "objective/train/tokens_used": 1157509600, + "objective/train/value_avg": -0.0189361572265625, + "objective/train/value_loss": 0.002381972037255764, + "objective/train/value_max": -0.0010166168212890625, + "objective/train/value_min": -0.9189453125, + "objective/train/value_reward_corr": 0.5794724002372461, + "objective/train/value_std": 0.029327392578125, + "objective/train/weight_avg": 1.0009791851043701, + "objective/train/weighted_lm_loss": 2.506938934326172, + "objective/train/weights_max": 1.0403354167938232, + "objective/train/weights_min": 0.918678343296051, + "theoretical_loss": 3.605379729699039, + "tokens_seen": 1137049600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006619582664526485, + "loss": 1.6115, + "theoretical_loss": 3.605186708566225, + "tokens_seen": 1137704960 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.00913952849805355, + "objective/train/docs_used": 649005, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.115351915359497, + "objective/train/original_loss": 3.115351676940918, + "objective/train/theoretical_loss": 3.6048974435598193, + "objective/train/tokens_used": 1159148000, + "objective/train/value_avg": -0.042694091796875, + "objective/train/value_loss": 0.0024378325324505568, + "objective/train/value_max": -0.0008935928344726562, + "objective/train/value_min": -0.892578125, + "objective/train/value_reward_corr": 0.8113855324315864, + "objective/train/value_std": 0.082763671875, + "objective/train/weight_avg": 1.0009262561798096, + "objective/train/weighted_lm_loss": 3.119546413421631, + "objective/train/weights_max": 1.067919135093689, + "objective/train/weights_min": 0.9681122303009033, + "theoretical_loss": 3.6048974435598193, + "tokens_seen": 1138688000 + }, + { + "epoch": 0.35, + "learning_rate": 0.000661637239165329, + "loss": 1.599, + "theoretical_loss": 3.6048781705920105, + "tokens_seen": 1138753536 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006613162118780096, + "loss": 1.6026, + "theoretical_loss": 3.6045699960579896, + "tokens_seen": 1139802112 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.0002743069198913872, + "objective/train/docs_used": 649993, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3822009563446045, + "objective/train/original_loss": 3.3822004795074463, + "objective/train/theoretical_loss": 3.6044160448428775, + "objective/train/tokens_used": 1160786400, + "objective/train/value_avg": -0.0209503173828125, + "objective/train/value_loss": 0.007360289804637432, + "objective/train/value_max": -0.00080108642578125, + "objective/train/value_min": -0.97509765625, + "objective/train/value_reward_corr": 0.5595372623326569, + "objective/train/value_std": 0.05377197265625, + "objective/train/weight_avg": 1.0000087022781372, + "objective/train/weighted_lm_loss": 3.382650136947632, + "objective/train/weights_max": 1.07321298122406, + "objective/train/weights_min": 0.9063414931297302, + "theoretical_loss": 3.6044160448428775, + "tokens_seen": 1140326400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006609951845906902, + "loss": 1.5792, + "theoretical_loss": 3.60426218420229, + "tokens_seen": 1140850688 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006606741573033707, + "loss": 1.5777, + "theoretical_loss": 3.603954734265334, + "tokens_seen": 1141899264 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.007861757650971413, + "objective/train/docs_used": 650682, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6099960803985596, + "objective/train/original_loss": 3.6099960803985596, + "objective/train/theoretical_loss": 3.603935530643809, + "objective/train/tokens_used": 1162424800, + "objective/train/value_avg": -0.0172271728515625, + "objective/train/value_loss": 0.0031552473083138466, + "objective/train/value_max": -0.0005679130554199219, + "objective/train/value_min": -0.94677734375, + "objective/train/value_reward_corr": 0.3770982355077112, + "objective/train/value_std": 0.036407470703125, + "objective/train/weight_avg": 1.0008018016815186, + "objective/train/weighted_lm_loss": 3.6123898029327393, + "objective/train/weights_max": 1.079201102256775, + "objective/train/weights_min": 0.9098518490791321, + "theoretical_loss": 3.603935530643809, + "tokens_seen": 1141964800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006603531300160513, + "loss": 1.5542, + "theoretical_loss": 3.60364764548983, + "tokens_seen": 1142947840 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.004804254975169897, + "objective/train/docs_used": 652177, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.642241954803467, + "objective/train/original_loss": 2.6422417163848877, + "objective/train/theoretical_loss": 3.603455898071866, + "objective/train/tokens_used": 1164063200, + "objective/train/value_avg": -0.024261474609375, + "objective/train/value_loss": 0.00978259276598692, + "objective/train/value_max": -0.0007948875427246094, + "objective/train/value_min": -0.82958984375, + "objective/train/value_reward_corr": 0.6331190265025862, + "objective/train/value_std": 0.051177978515625, + "objective/train/weight_avg": 0.9995676875114441, + "objective/train/weighted_lm_loss": 2.6404502391815186, + "objective/train/weights_max": 1.0403748750686646, + "objective/train/weights_min": 0.9149937629699707, + "theoretical_loss": 3.603455898071866, + "tokens_seen": 1143603200 + }, + { + "epoch": 0.35, + "learning_rate": 0.000660032102728732, + "loss": 1.5858, + "theoretical_loss": 3.6033409171207644, + "tokens_seen": 1143996416 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006597110754414126, + "loss": 1.5747, + "theoretical_loss": 3.6030345484053923, + "tokens_seen": 1145044992 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.000510512269102037, + "objective/train/docs_used": 652765, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.149134874343872, + "objective/train/original_loss": 3.149134874343872, + "objective/train/theoretical_loss": 3.6029771442498717, + "objective/train/tokens_used": 1165701600, + "objective/train/value_avg": -0.0178985595703125, + "objective/train/value_loss": 0.0067058890126645565, + "objective/train/value_max": -0.0005745887756347656, + "objective/train/value_min": -0.9189453125, + "objective/train/value_reward_corr": 0.4364763379151599, + "objective/train/value_std": 0.03302001953125, + "objective/train/weight_avg": 1.0000839233398438, + "objective/train/weighted_lm_loss": 3.1491570472717285, + "objective/train/weights_max": 1.0392102003097534, + "objective/train/weights_min": 0.9063280820846558, + "theoretical_loss": 3.6029771442498717, + "tokens_seen": 1145241600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006593900481540931, + "loss": 1.577, + "theoretical_loss": 3.602728538593227, + "tokens_seen": 1146093568 + }, + { + "debugging/Self-BLEU-5": 0.293406318468253, + "debugging/distinct-1-grams": 0.8098961360359294, + "debugging/distinct-2-grams": 0.9639839252742478, + "debugging/entropy-1-grams": 5.203556100794769, + "debugging/entropy-2-grams": 5.919939470517111, + "debugging/length": 412.8333333333333, + "debugging/num_segments": 6, + "debugging/raw_token_scores_avg": 0.002136901719495654, + "debugging/raw_token_scores_std": 0.007460694294422865, + "epoch": 0.35, + "objective/train/advantage_avg": 0.009626097977161407, + "objective/train/docs_used": 652965, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1145195960998535, + "objective/train/original_loss": 3.1145198345184326, + "objective/train/theoretical_loss": 3.6024992663141386, + "objective/train/tokens_used": 1167340000, + "objective/train/value_avg": -0.0117645263671875, + "objective/train/value_loss": 0.00023663257888983935, + "objective/train/value_max": -0.0005726814270019531, + "objective/train/value_min": -0.14453125, + "objective/train/value_reward_corr": 0.1835309905569115, + "objective/train/value_std": 0.0108642578125, + "objective/train/weight_avg": 1.000963807106018, + "objective/train/weighted_lm_loss": 3.117464780807495, + "objective/train/weights_max": 1.0144613981246948, + "objective/train/weights_min": 0.9877168536186218, + "theoretical_loss": 3.6024992663141386, + "tokens_seen": 1146880000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006590690208667737, + "loss": 1.5888, + "theoretical_loss": 3.6024228869360346, + "tokens_seen": 1147142144 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006587479935794543, + "loss": 1.6188, + "theoretical_loss": 3.602117592687822, + "tokens_seen": 1148190720 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.005320437252521515, + "objective/train/docs_used": 654364, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.888399839401245, + "objective/train/original_loss": 2.888400077819824, + "objective/train/theoretical_loss": 3.6020222614143846, + "objective/train/tokens_used": 1168978400, + "objective/train/value_avg": -0.0161285400390625, + "objective/train/value_loss": 0.0008391059236600995, + "objective/train/value_max": -0.0008831024169921875, + "objective/train/value_min": -0.2327880859375, + "objective/train/value_reward_corr": 0.43227964227149124, + "objective/train/value_std": 0.01611328125, + "objective/train/weight_avg": 1.0005362033843994, + "objective/train/weighted_lm_loss": 2.8908016681671143, + "objective/train/weights_max": 1.0225179195404053, + "objective/train/weights_min": 0.9683188796043396, + "theoretical_loss": 3.6020222614143846, + "tokens_seen": 1148518400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006584269662921348, + "loss": 1.5941, + "theoretical_loss": 3.6018126551048306, + "tokens_seen": 1149239296 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0015626937383785844, + "objective/train/docs_used": 654951, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1290318965911865, + "objective/train/original_loss": 3.1290318965911865, + "objective/train/theoretical_loss": 3.601546126713652, + "objective/train/tokens_used": 1170616800, + "objective/train/value_avg": -0.0180511474609375, + "objective/train/value_loss": 0.0030113249085843563, + "objective/train/value_max": -0.0003199577331542969, + "objective/train/value_min": -0.464599609375, + "objective/train/value_reward_corr": 0.5909245708952962, + "objective/train/value_std": 0.027679443359375, + "objective/train/weight_avg": 1.000171184539795, + "objective/train/weighted_lm_loss": 3.128798007965088, + "objective/train/weights_max": 1.0202604532241821, + "objective/train/weights_min": 0.9309244751930237, + "theoretical_loss": 3.601546126713652, + "tokens_seen": 1150156800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006581059390048154, + "loss": 1.6102, + "theoretical_loss": 3.6015080734455243, + "tokens_seen": 1150287872 + }, + { + "epoch": 0.35, + "learning_rate": 0.000657784911717496, + "loss": 1.5682, + "theoretical_loss": 3.601203846970585, + "tokens_seen": 1151336448 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.0028265409637242556, + "objective/train/docs_used": 656079, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.025010824203491, + "objective/train/original_loss": 3.025010824203491, + "objective/train/theoretical_loss": 3.601070859388227, + "objective/train/tokens_used": 1172255200, + "objective/train/value_avg": -0.0198822021484375, + "objective/train/value_loss": 0.008504724130034447, + "objective/train/value_max": -0.0007181167602539062, + "objective/train/value_min": -0.51904296875, + "objective/train/value_reward_corr": 0.37046780786509276, + "objective/train/value_std": 0.0252685546875, + "objective/train/weight_avg": 0.9997591376304626, + "objective/train/weighted_lm_loss": 3.023517608642578, + "objective/train/weights_max": 1.0460680723190308, + "objective/train/weights_min": 0.9182621240615845, + "theoretical_loss": 3.601070859388227, + "tokens_seen": 1151795200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006574638844301766, + "loss": 1.5916, + "theoretical_loss": 3.6008999749429007, + "tokens_seen": 1152385024 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.001126136863604188, + "objective/train/docs_used": 656588, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1215920448303223, + "objective/train/original_loss": 3.1215922832489014, + "objective/train/theoretical_loss": 3.6005964566275575, + "objective/train/tokens_used": 1173893600, + "objective/train/value_avg": -0.0217742919921875, + "objective/train/value_loss": 0.008048195391893387, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.88671875, + "objective/train/value_reward_corr": 0.5402650911574467, + "objective/train/value_std": 0.045135498046875, + "objective/train/weight_avg": 1.0001521110534668, + "objective/train/weighted_lm_loss": 3.1214377880096436, + "objective/train/weights_max": 1.0827407836914062, + "objective/train/weights_min": 0.90770423412323, + "theoretical_loss": 3.6005964566275575, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006571428571428571, + "loss": 1.6135, + "theoretical_loss": 3.6005964566275575, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006568218298555377, + "loss": 1.6194, + "theoretical_loss": 3.600293291291833, + "tokens_seen": 1154482176 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.0019421400502324104, + "objective/train/docs_used": 657761, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0445542335510254, + "objective/train/original_loss": 3.0445544719696045, + "objective/train/theoretical_loss": 3.600122915634175, + "objective/train/tokens_used": 1175532000, + "objective/train/value_avg": -0.04119873046875, + "objective/train/value_loss": 0.014283187687397003, + "objective/train/value_max": -0.0005359649658203125, + "objective/train/value_min": -0.97900390625, + "objective/train/value_reward_corr": 0.6671212787834261, + "objective/train/value_std": 0.10784912109375, + "objective/train/weight_avg": 0.9998759031295776, + "objective/train/weighted_lm_loss": 3.042656898498535, + "objective/train/weights_max": 1.0848654508590698, + "objective/train/weights_min": 0.9083343148231506, + "theoretical_loss": 3.600122915634175, + "tokens_seen": 1155072000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006565008025682183, + "loss": 1.5986, + "theoretical_loss": 3.5999904782051866, + "tokens_seen": 1155530752 + }, + { + "epoch": 0.35, + "learning_rate": 0.000656179775280899, + "loss": 1.5621, + "theoretical_loss": 3.5996880166392486, + "tokens_seen": 1156579328 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.006542462855577469, + "objective/train/docs_used": 658495, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6421091556549072, + "objective/train/original_loss": 3.6421093940734863, + "objective/train/theoretical_loss": 3.5996502336236142, + "objective/train/tokens_used": 1177170400, + "objective/train/value_avg": -0.0176849365234375, + "objective/train/value_loss": 0.0034781547728925943, + "objective/train/value_max": -0.00041413307189941406, + "objective/train/value_min": -0.82666015625, + "objective/train/value_reward_corr": 0.3960542878836679, + "objective/train/value_std": 0.03094482421875, + "objective/train/weight_avg": 1.00067138671875, + "objective/train/weighted_lm_loss": 3.6445388793945312, + "objective/train/weights_max": 1.0374739170074463, + "objective/train/weights_min": 0.9124050140380859, + "theoretical_loss": 3.5996502336236142, + "tokens_seen": 1156710400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006558587479935795, + "loss": 1.599, + "theoretical_loss": 3.599385905867816, + "tokens_seen": 1157627904 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.023180367425084114, + "objective/train/docs_used": 659118, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2504312992095947, + "objective/train/original_loss": 3.250432014465332, + "objective/train/theoretical_loss": 3.5991784078243345, + "objective/train/tokens_used": 1178808800, + "objective/train/value_avg": -0.01507568359375, + "objective/train/value_loss": 0.011932369321584702, + "objective/train/value_max": -0.0007824897766113281, + "objective/train/value_min": -0.79296875, + "objective/train/value_reward_corr": 0.46571479368255486, + "objective/train/value_std": 0.022186279296875, + "objective/train/weight_avg": 0.9977407455444336, + "objective/train/weighted_lm_loss": 3.2445764541625977, + "objective/train/weights_max": 1.0466816425323486, + "objective/train/weights_min": 0.9067167043685913, + "theoretical_loss": 3.5991784078243345, + "tokens_seen": 1158348800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006555377207062601, + "loss": 1.5831, + "theoretical_loss": 3.5990841451668416, + "tokens_seen": 1158676480 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006552166934189407, + "loss": 1.572, + "theoretical_loss": 3.598782733814426, + "tokens_seen": 1159725056 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0003216532932128757, + "objective/train/docs_used": 660623, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.331434726715088, + "objective/train/original_loss": 3.331434488296509, + "objective/train/theoretical_loss": 3.5987074354776407, + "objective/train/tokens_used": 1180447200, + "objective/train/value_avg": -0.022369384765625, + "objective/train/value_loss": 0.006807297468185425, + "objective/train/value_max": -0.0006461143493652344, + "objective/train/value_min": -0.822265625, + "objective/train/value_reward_corr": 0.4944530784477598, + "objective/train/value_std": 0.03839111328125, + "objective/train/weight_avg": 1.0000656843185425, + "objective/train/weighted_lm_loss": 3.3319358825683594, + "objective/train/weights_max": 1.0702189207077026, + "objective/train/weights_min": 0.9101179242134094, + "theoretical_loss": 3.5987074354776407, + "tokens_seen": 1159987200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006548956661316212, + "loss": 1.6057, + "theoretical_loss": 3.598481671090809, + "tokens_seen": 1160773632 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0020557919051498175, + "objective/train/docs_used": 661405, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.391097068786621, + "objective/train/original_loss": 3.391096830368042, + "objective/train/theoretical_loss": 3.5982373138376076, + "objective/train/tokens_used": 1182085600, + "objective/train/value_avg": -0.0302581787109375, + "objective/train/value_loss": 0.0028628245927393436, + "objective/train/value_max": -0.0007181167602539062, + "objective/train/value_min": -0.4970703125, + "objective/train/value_reward_corr": 0.7550848456755501, + "objective/train/value_std": 0.066650390625, + "objective/train/weight_avg": 1.0002198219299316, + "objective/train/weighted_lm_loss": 3.3918702602386475, + "objective/train/weights_max": 1.0417922735214233, + "objective/train/weights_min": 0.955776035785675, + "theoretical_loss": 3.5982373138376076, + "tokens_seen": 1161625600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006545746388443018, + "loss": 1.5958, + "theoretical_loss": 3.5981809562783633, + "tokens_seen": 1161822208 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006542536115569824, + "loss": 1.5528, + "theoretical_loss": 3.5978805886615834, + "tokens_seen": 1162870784 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.01482967846095562, + "objective/train/docs_used": 662532, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.346525192260742, + "objective/train/original_loss": 3.346524953842163, + "objective/train/theoretical_loss": 3.597768040171002, + "objective/train/tokens_used": 1183724000, + "objective/train/value_avg": -0.031341552734375, + "objective/train/value_loss": 0.012327845208346844, + "objective/train/value_max": -0.0007615089416503906, + "objective/train/value_min": -0.890625, + "objective/train/value_reward_corr": 0.6385090179951274, + "objective/train/value_std": 0.07550048828125, + "objective/train/weight_avg": 0.9985777139663696, + "objective/train/weighted_lm_loss": 3.3442368507385254, + "objective/train/weights_max": 1.0457737445831299, + "objective/train/weights_min": 0.9073807597160339, + "theoretical_loss": 3.597768040171002, + "tokens_seen": 1163264000 + }, + { + "epoch": 0.35, + "learning_rate": 0.000653932584269663, + "loss": 1.5807, + "theoretical_loss": 3.5975805675270784, + "tokens_seen": 1163919360 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.007293686270713806, + "objective/train/docs_used": 663185, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8975205421447754, + "objective/train/original_loss": 2.8975203037261963, + "objective/train/theoretical_loss": 3.5972996117572054, + "objective/train/tokens_used": 1185362400, + "objective/train/value_avg": -0.010772705078125, + "objective/train/value_loss": 0.0023679565638303757, + "objective/train/value_max": -0.0004076957702636719, + "objective/train/value_min": -0.1470947265625, + "objective/train/value_reward_corr": 0.5436830626728251, + "objective/train/value_std": 0.0106048583984375, + "objective/train/weight_avg": 0.9992823600769043, + "objective/train/weighted_lm_loss": 2.897773504257202, + "objective/train/weights_max": 1.0147262811660767, + "objective/train/weights_min": 0.9744361639022827, + "theoretical_loss": 3.5972996117572054, + "tokens_seen": 1164902400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006536115569823435, + "loss": 1.5817, + "theoretical_loss": 3.5972808921635666, + "tokens_seen": 1164967936 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006532905296950241, + "loss": 1.5793, + "theoretical_loss": 3.5969815618618615, + "tokens_seen": 1166016512 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0031176586635410786, + "objective/train/docs_used": 664585, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9449338912963867, + "objective/train/original_loss": 2.9449336528778076, + "objective/train/theoretical_loss": 3.5968320258881388, + "objective/train/tokens_used": 1187000800, + "objective/train/value_avg": -0.01340484619140625, + "objective/train/value_loss": 0.0015347478911280632, + "objective/train/value_max": -0.0007791519165039062, + "objective/train/value_min": -0.3583984375, + "objective/train/value_reward_corr": 0.3382699050661009, + "objective/train/value_std": 0.017608642578125, + "objective/train/weight_avg": 1.000319480895996, + "objective/train/weighted_lm_loss": 2.9465839862823486, + "objective/train/weights_max": 1.0308887958526611, + "objective/train/weights_min": 0.9744604229927063, + "theoretical_loss": 3.5968320258881388, + "tokens_seen": 1166540800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006529695024077047, + "loss": 1.5802, + "theoretical_loss": 3.5966825759148704, + "tokens_seen": 1167065088 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006526484751203852, + "loss": 1.5834, + "theoretical_loss": 3.5963839336175814, + "tokens_seen": 1168113664 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.003727912437170744, + "objective/train/docs_used": 665171, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7734286785125732, + "objective/train/original_loss": 2.7734286785125732, + "objective/train/theoretical_loss": 3.5963652798681878, + "objective/train/tokens_used": 1188639200, + "objective/train/value_avg": -0.02874755859375, + "objective/train/value_loss": 0.002442124532535672, + "objective/train/value_max": -0.0007381439208984375, + "objective/train/value_min": -0.52001953125, + "objective/train/value_reward_corr": 0.8242300982057718, + "objective/train/value_std": 0.05706787109375, + "objective/train/weight_avg": 1.0003849267959595, + "objective/train/weighted_lm_loss": 2.7737300395965576, + "objective/train/weights_max": 1.0342519283294678, + "objective/train/weights_min": 0.9454085826873779, + "theoretical_loss": 3.5963652798681878, + "tokens_seen": 1168179200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006523274478330658, + "loss": 1.5726, + "theoretical_loss": 3.596085634267058, + "tokens_seen": 1169162240 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.00933599192649126, + "objective/train/docs_used": 666471, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.322331428527832, + "objective/train/original_loss": 3.322331428527832, + "objective/train/theoretical_loss": 3.595899371014127, + "objective/train/tokens_used": 1190277600, + "objective/train/value_avg": -0.0198974609375, + "objective/train/value_loss": 0.0032126244623214006, + "objective/train/value_max": -0.00046181678771972656, + "objective/train/value_min": -0.89306640625, + "objective/train/value_reward_corr": 0.5806652763993193, + "objective/train/value_std": 0.035186767578125, + "objective/train/weight_avg": 1.000949501991272, + "objective/train/weighted_lm_loss": 3.3250999450683594, + "objective/train/weights_max": 1.0458966493606567, + "objective/train/weights_min": 0.9135488867759705, + "theoretical_loss": 3.595899371014127, + "tokens_seen": 1169817600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006520064205457465, + "loss": 1.5865, + "theoretical_loss": 3.5957876771624298, + "tokens_seen": 1170210816 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006516853932584271, + "loss": 1.5774, + "theoretical_loss": 3.5954900616048855, + "tokens_seen": 1171259392 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": -0.003184134839102626, + "objective/train/docs_used": 666886, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.478727102279663, + "objective/train/original_loss": 3.478727102279663, + "objective/train/theoretical_loss": 3.595434296655048, + "objective/train/tokens_used": 1191916000, + "objective/train/value_avg": -0.021148681640625, + "objective/train/value_loss": 0.008286127820611, + "objective/train/value_max": -0.00044083595275878906, + "objective/train/value_min": -0.9033203125, + "objective/train/value_reward_corr": 0.6328650841947232, + "objective/train/value_std": 0.042755126953125, + "objective/train/weight_avg": 0.9997222423553467, + "objective/train/weighted_lm_loss": 3.477269411087036, + "objective/train/weights_max": 1.078615665435791, + "objective/train/weights_min": 0.9073711037635803, + "theoretical_loss": 3.595434296655048, + "tokens_seen": 1171456000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006513643659711076, + "loss": 1.5709, + "theoretical_loss": 3.5951927868976643, + "tokens_seen": 1172307968 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.013072420842945576, + "objective/train/docs_used": 667905, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2727670669555664, + "objective/train/original_loss": 3.2727670669555664, + "objective/train/theoretical_loss": 3.594970054132281, + "objective/train/tokens_used": 1193554400, + "objective/train/value_avg": -0.0307769775390625, + "objective/train/value_loss": 0.006666362751275301, + "objective/train/value_max": -0.0005908012390136719, + "objective/train/value_min": -0.95166015625, + "objective/train/value_reward_corr": 0.4013536943251714, + "objective/train/value_std": 0.05633544921875, + "objective/train/weight_avg": 1.0013402700424194, + "objective/train/weighted_lm_loss": 3.278088331222534, + "objective/train/weights_max": 1.06490957736969, + "objective/train/weights_min": 0.907616138458252, + "theoretical_loss": 3.594970054132281, + "tokens_seen": 1173094400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006510433386837882, + "loss": 1.6017, + "theoretical_loss": 3.5948958523460495, + "tokens_seen": 1173356544 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006507223113964688, + "loss": 1.601, + "theoretical_loss": 3.5945992572573577, + "tokens_seen": 1174405120 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.004960459657013416, + "objective/train/docs_used": 668590, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.791318655014038, + "objective/train/original_loss": 2.791318655014038, + "objective/train/theoretical_loss": 3.594506640799327, + "objective/train/tokens_used": 1195192800, + "objective/train/value_avg": -0.014373779296875, + "objective/train/value_loss": 0.001585001708008349, + "objective/train/value_max": -0.0010404586791992188, + "objective/train/value_min": -0.453857421875, + "objective/train/value_reward_corr": 0.22640126028659893, + "objective/train/value_std": 0.01514434814453125, + "objective/train/weight_avg": 1.0005038976669312, + "objective/train/weighted_lm_loss": 2.7930099964141846, + "objective/train/weights_max": 1.0374093055725098, + "objective/train/weights_min": 0.9311681389808655, + "theoretical_loss": 3.594506640799327, + "tokens_seen": 1174732800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006504012841091493, + "loss": 1.5788, + "theoretical_loss": 3.5943030009409345, + "tokens_seen": 1175453696 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -0.01000335905700922, + "objective/train/docs_used": 669752, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.061678171157837, + "objective/train/original_loss": 3.061678171157837, + "objective/train/theoretical_loss": 3.594044054021782, + "objective/train/tokens_used": 1196831200, + "objective/train/value_avg": -0.0235748291015625, + "objective/train/value_loss": 0.007484646514058113, + "objective/train/value_max": -0.0005421638488769531, + "objective/train/value_min": -0.8251953125, + "objective/train/value_reward_corr": 0.553576896767388, + "objective/train/value_std": 0.03839111328125, + "objective/train/weight_avg": 0.9990365505218506, + "objective/train/weighted_lm_loss": 3.05922794342041, + "objective/train/weights_max": 1.033237099647522, + "objective/train/weights_min": 0.9143261909484863, + "theoretical_loss": 3.594044054021782, + "tokens_seen": 1176371200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006500802568218299, + "loss": 1.5812, + "theoretical_loss": 3.5940070827081443, + "tokens_seen": 1176502272 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006497592295345105, + "loss": 1.5851, + "theoretical_loss": 3.593711501872364, + "tokens_seen": 1177550848 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.007641020230948925, + "objective/train/docs_used": 670392, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.552468776702881, + "objective/train/original_loss": 3.5524682998657227, + "objective/train/theoretical_loss": 3.593582291177267, + "objective/train/tokens_used": 1198469600, + "objective/train/value_avg": -0.016510009765625, + "objective/train/value_loss": 0.0025883568450808525, + "objective/train/value_max": -0.0003726482391357422, + "objective/train/value_min": -0.97802734375, + "objective/train/value_reward_corr": 0.6286282878409195, + "objective/train/value_std": 0.03936767578125, + "objective/train/weight_avg": 1.000776767730713, + "objective/train/weighted_lm_loss": 3.5546000003814697, + "objective/train/weights_max": 1.0717437267303467, + "objective/train/weights_min": 0.9089593291282654, + "theoretical_loss": 3.593582291177267, + "tokens_seen": 1178009600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006494382022471911, + "loss": 1.573, + "theoretical_loss": 3.5934162577489746, + "tokens_seen": 1178599424 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -0.004430354107171297, + "objective/train/docs_used": 671618, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3726248741149902, + "objective/train/original_loss": 3.3726251125335693, + "objective/train/theoretical_loss": 3.5931213496553536, + "objective/train/tokens_used": 1200108000, + "objective/train/value_avg": -0.0236663818359375, + "objective/train/value_loss": 0.003705088747665286, + "objective/train/value_max": -0.0005316734313964844, + "objective/train/value_min": -0.9423828125, + "objective/train/value_reward_corr": 0.6001190397538178, + "objective/train/value_std": 0.0355224609375, + "objective/train/weight_avg": 0.9995753169059753, + "objective/train/weighted_lm_loss": 3.3716630935668945, + "objective/train/weights_max": 1.0418933629989624, + "objective/train/weights_min": 0.910900890827179, + "theoretical_loss": 3.5931213496553536, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006491171749598715, + "loss": 1.5662, + "theoretical_loss": 3.5931213496553536, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006487961476725521, + "loss": 1.6035, + "theoretical_loss": 3.5928267769108677, + "tokens_seen": 1180696576 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0018116022692993283, + "objective/train/docs_used": 672229, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0781102180480957, + "objective/train/original_loss": 3.078110456466675, + "objective/train/theoretical_loss": 3.592661226857496, + "objective/train/tokens_used": 1201746400, + "objective/train/value_avg": -0.0251007080078125, + "objective/train/value_loss": 0.005497463047504425, + "objective/train/value_max": -0.0007066726684570312, + "objective/train/value_min": -0.90771484375, + "objective/train/value_reward_corr": 0.6492825923889279, + "objective/train/value_std": 0.05059814453125, + "objective/train/weight_avg": 1.0002082586288452, + "objective/train/weighted_lm_loss": 3.08032488822937, + "objective/train/weights_max": 1.0494253635406494, + "objective/train/weights_min": 0.9198306202888489, + "theoretical_loss": 3.592661226857496, + "tokens_seen": 1181286400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006484751203852327, + "loss": 1.5606, + "theoretical_loss": 3.5925325388368656, + "tokens_seen": 1181745152 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006481540930979132, + "loss": 1.5809, + "theoretical_loss": 3.5922386347566695, + "tokens_seen": 1182793728 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.005119468085467815, + "objective/train/docs_used": 673021, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.079346179962158, + "objective/train/original_loss": 3.0793466567993164, + "objective/train/theoretical_loss": 3.592201920196959, + "objective/train/tokens_used": 1203384800, + "objective/train/value_avg": -0.0170745849609375, + "objective/train/value_loss": 0.004014984704554081, + "objective/train/value_max": -0.0006165504455566406, + "objective/train/value_min": -0.8525390625, + "objective/train/value_reward_corr": 0.3716597992815951, + "objective/train/value_std": 0.025238037109375, + "objective/train/weight_avg": 1.000531554222107, + "objective/train/weighted_lm_loss": 3.080808401107788, + "objective/train/weights_max": 1.0479094982147217, + "objective/train/weights_min": 0.9112878441810608, + "theoretical_loss": 3.592201920196959, + "tokens_seen": 1182924800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006478330658105938, + "loss": 1.5552, + "theoretical_loss": 3.591945063995568, + "tokens_seen": 1183842304 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.003847569227218628, + "objective/train/docs_used": 674157, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4364638328552246, + "objective/train/original_loss": 3.436464309692383, + "objective/train/theoretical_loss": 3.5917434270987485, + "objective/train/tokens_used": 1205023200, + "objective/train/value_avg": -0.0174102783203125, + "objective/train/value_loss": 0.0028073291759938, + "objective/train/value_max": -0.0006823539733886719, + "objective/train/value_min": -0.7568359375, + "objective/train/value_reward_corr": 0.5387287052951452, + "objective/train/value_std": 0.03045654296875, + "objective/train/weight_avg": 1.0003986358642578, + "objective/train/weighted_lm_loss": 3.4376413822174072, + "objective/train/weights_max": 1.043826937675476, + "objective/train/weights_min": 0.9115138053894043, + "theoretical_loss": 3.5917434270987485, + "tokens_seen": 1184563200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006475120385232745, + "loss": 1.5755, + "theoretical_loss": 3.591651825880809, + "tokens_seen": 1184890880 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006471910112359551, + "loss": 1.5804, + "theoretical_loss": 3.591358919741592, + "tokens_seen": 1185939456 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0031490870751440525, + "objective/train/docs_used": 674881, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.742790699005127, + "objective/train/original_loss": 2.742790699005127, + "objective/train/theoretical_loss": 3.591285744999542, + "objective/train/tokens_used": 1206661600, + "objective/train/value_avg": -0.0167083740234375, + "objective/train/value_loss": 0.000886575086042285, + "objective/train/value_max": -0.0007643699645996094, + "objective/train/value_min": -0.28857421875, + "objective/train/value_reward_corr": 0.6688405221843289, + "objective/train/value_std": 0.019287109375, + "objective/train/weight_avg": 1.0003193616867065, + "objective/train/weighted_lm_loss": 2.7445216178894043, + "objective/train/weights_max": 1.020230770111084, + "objective/train/weights_min": 0.9195778369903564, + "theoretical_loss": 3.591285744999542, + "tokens_seen": 1186201600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006468699839486356, + "loss": 1.5616, + "theoretical_loss": 3.591066344909062, + "tokens_seen": 1186988032 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0028864084742963314, + "objective/train/docs_used": 676005, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1933655738830566, + "objective/train/original_loss": 3.1933658123016357, + "objective/train/theoretical_loss": 3.5908288713476195, + "objective/train/tokens_used": 1208300000, + "objective/train/value_avg": -0.020782470703125, + "objective/train/value_loss": 0.006204212550073862, + "objective/train/value_max": -0.00042724609375, + "objective/train/value_min": -0.78076171875, + "objective/train/value_reward_corr": 0.450501245783913, + "objective/train/value_std": 0.042327880859375, + "objective/train/weight_avg": 1.0003191232681274, + "objective/train/weighted_lm_loss": 3.1942288875579834, + "objective/train/weights_max": 1.0586578845977783, + "objective/train/weights_min": 0.9075598120689392, + "theoretical_loss": 3.5908288713476195, + "tokens_seen": 1187840000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006465489566613162, + "loss": 1.5534, + "theoretical_loss": 3.590774100716298, + "tokens_seen": 1188036608 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006462279293739968, + "loss": 1.5934, + "theoretical_loss": 3.5904821864983116, + "tokens_seen": 1189085184 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0009410595521330833, + "objective/train/docs_used": 676669, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.365514039993286, + "objective/train/original_loss": 3.365514039993286, + "objective/train/theoretical_loss": 3.590372803602795, + "objective/train/tokens_used": 1209938400, + "objective/train/value_avg": -0.01476287841796875, + "objective/train/value_loss": 0.002417605835944414, + "objective/train/value_max": -0.0006718635559082031, + "objective/train/value_min": -0.6748046875, + "objective/train/value_reward_corr": 0.4921121734077255, + "objective/train/value_std": 0.019195556640625, + "objective/train/weight_avg": 1.0001059770584106, + "objective/train/weighted_lm_loss": 3.3663880825042725, + "objective/train/weights_max": 1.0220332145690918, + "objective/train/weights_min": 0.9347859025001526, + "theoretical_loss": 3.590372803602795, + "tokens_seen": 1189478400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006459069020866774, + "loss": 1.5799, + "theoretical_loss": 3.5901906015920355, + "tokens_seen": 1190133760 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -0.0036867731250822544, + "objective/train/docs_used": 677774, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5493600368499756, + "objective/train/original_loss": 2.5493597984313965, + "objective/train/theoretical_loss": 3.5899175392363483, + "objective/train/tokens_used": 1211576800, + "objective/train/value_avg": -0.021514892578125, + "objective/train/value_loss": 0.0071929446421563625, + "objective/train/value_max": -0.0006337165832519531, + "objective/train/value_min": -0.92578125, + "objective/train/value_reward_corr": 0.6976053147136617, + "objective/train/value_std": 0.055511474609375, + "objective/train/weight_avg": 0.9996665716171265, + "objective/train/weighted_lm_loss": 2.549388885498047, + "objective/train/weights_max": 1.047416090965271, + "objective/train/weights_min": 0.9085037112236023, + "theoretical_loss": 3.5899175392363483, + "tokens_seen": 1191116800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006455858747993579, + "loss": 1.5399, + "theoretical_loss": 3.5898993453363173, + "tokens_seen": 1191182336 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006452648475120385, + "loss": 1.5604, + "theoretical_loss": 3.5896084170719127, + "tokens_seen": 1192230912 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.009028124623000622, + "objective/train/docs_used": 678420, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0344460010528564, + "objective/train/original_loss": 3.0344464778900146, + "objective/train/theoretical_loss": 3.589463075730959, + "objective/train/tokens_used": 1213215200, + "objective/train/value_avg": -0.0221099853515625, + "objective/train/value_loss": 0.002002796158194542, + "objective/train/value_max": -0.0006985664367675781, + "objective/train/value_min": -0.405029296875, + "objective/train/value_reward_corr": 0.3084282695008862, + "objective/train/value_std": 0.02337646484375, + "objective/train/weight_avg": 1.0009126663208008, + "objective/train/weighted_lm_loss": 3.0366616249084473, + "objective/train/weights_max": 1.028374433517456, + "objective/train/weights_min": 0.9259040355682373, + "theoretical_loss": 3.589463075730959, + "tokens_seen": 1192755200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006449438202247191, + "loss": 1.5763, + "theoretical_loss": 3.5893178161414783, + "tokens_seen": 1193279488 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006446227929373996, + "loss": 1.5333, + "theoretical_loss": 3.589027541889564, + "tokens_seen": 1194328064 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.004393371753394604, + "objective/train/docs_used": 679445, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.769395589828491, + "objective/train/original_loss": 2.7693958282470703, + "objective/train/theoretical_loss": 3.5890094105806383, + "objective/train/tokens_used": 1214853600, + "objective/train/value_avg": -0.01371002197265625, + "objective/train/value_loss": 0.0020377906039357185, + "objective/train/value_max": -0.0004916191101074219, + "objective/train/value_min": -0.33740234375, + "objective/train/value_reward_corr": 0.2264829478548479, + "objective/train/value_std": 0.014617919921875, + "objective/train/weight_avg": 1.0004494190216064, + "objective/train/weighted_lm_loss": 2.7700347900390625, + "objective/train/weights_max": 1.0328521728515625, + "objective/train/weights_min": 0.9360846281051636, + "theoretical_loss": 3.5890094105806383, + "tokens_seen": 1194393600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006443017656500802, + "loss": 1.5435, + "theoretical_loss": 3.5887375936626067, + "tokens_seen": 1195376640 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.004315066151320934, + "objective/train/docs_used": 680084, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2691123485565186, + "objective/train/original_loss": 3.2691123485565186, + "objective/train/theoretical_loss": 3.5885565412906617, + "objective/train/tokens_used": 1216492000, + "objective/train/value_avg": -0.0234222412109375, + "objective/train/value_loss": 0.0042769634164869785, + "objective/train/value_max": -0.0007734298706054688, + "objective/train/value_min": -0.5341796875, + "objective/train/value_reward_corr": 0.4880884838233496, + "objective/train/value_std": 0.033782958984375, + "objective/train/weight_avg": 1.0004526376724243, + "objective/train/weighted_lm_loss": 3.270905017852783, + "objective/train/weights_max": 1.0414866209030151, + "objective/train/weights_min": 0.9179091453552246, + "theoretical_loss": 3.5885565412906617, + "tokens_seen": 1196032000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006439807383627608, + "loss": 1.5845, + "theoretical_loss": 3.5884479708089216, + "tokens_seen": 1196425216 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006436597110754415, + "loss": 1.5824, + "theoretical_loss": 3.5881586726786976, + "tokens_seen": 1197473792 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.005574420094490051, + "objective/train/docs_used": 681502, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.105259656906128, + "objective/train/original_loss": 3.105259656906128, + "objective/train/theoretical_loss": 3.5881044653775054, + "objective/train/tokens_used": 1218130400, + "objective/train/value_avg": -0.01413726806640625, + "objective/train/value_loss": 0.0021137914154678583, + "objective/train/value_max": -0.0007319450378417969, + "objective/train/value_min": -0.271728515625, + "objective/train/value_reward_corr": 0.14921227803170695, + "objective/train/value_std": 0.0154571533203125, + "objective/train/weight_avg": 1.00056791305542, + "objective/train/weighted_lm_loss": 3.1067707538604736, + "objective/train/weights_max": 1.0259202718734741, + "objective/train/weights_min": 0.9276067018508911, + "theoretical_loss": 3.5881044653775054, + "tokens_seen": 1197670400 + }, + { + "epoch": 0.36, + "learning_rate": 0.000643338683788122, + "loss": 1.5477, + "theoretical_loss": 3.587869698623987, + "tokens_seen": 1198522368 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -1.1826132322312333e-05, + "objective/train/docs_used": 682215, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3725080490112305, + "objective/train/original_loss": 3.3725078105926514, + "objective/train/theoretical_loss": 3.5876531803687786, + "objective/train/tokens_used": 1219768800, + "objective/train/value_avg": -0.015655517578125, + "objective/train/value_loss": 0.007658195681869984, + "objective/train/value_max": -0.0008797645568847656, + "objective/train/value_min": -0.90966796875, + "objective/train/value_reward_corr": 0.501470221050105, + "objective/train/value_std": 0.0299072265625, + "objective/train/weight_avg": 1.0000362396240234, + "objective/train/weighted_lm_loss": 3.371187210083008, + "objective/train/weights_max": 1.0356013774871826, + "objective/train/weights_min": 0.9088230729103088, + "theoretical_loss": 3.5876531803687786, + "tokens_seen": 1199308800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006430176565008026, + "loss": 1.546, + "theoretical_loss": 3.587581047998703, + "tokens_seen": 1199570944 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006426966292134832, + "loss": 1.539, + "theoretical_loss": 3.587292720158608, + "tokens_seen": 1200619520 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -0.003067591693252325, + "objective/train/docs_used": 683386, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.277570962905884, + "objective/train/original_loss": 3.277570962905884, + "objective/train/theoretical_loss": 3.5872026838031594, + "objective/train/tokens_used": 1221407200, + "objective/train/value_avg": -0.0286712646484375, + "objective/train/value_loss": 0.013289807364344597, + "objective/train/value_max": -0.000690460205078125, + "objective/train/value_min": -0.9951171875, + "objective/train/value_reward_corr": 0.5432806193718683, + "objective/train/value_std": 0.06854248046875, + "objective/train/weight_avg": 0.9997585415840149, + "objective/train/weighted_lm_loss": 3.278656244277954, + "objective/train/weights_max": 1.0860530138015747, + "objective/train/weights_min": 0.9071435332298279, + "theoretical_loss": 3.5872026838031594, + "tokens_seen": 1200947200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006423756019261637, + "loss": 1.5424, + "theoretical_loss": 3.58700471446131, + "tokens_seen": 1201668096 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0008176976698450744, + "objective/train/docs_used": 683937, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.237710952758789, + "objective/train/original_loss": 3.2377114295959473, + "objective/train/theoretical_loss": 3.5867529732303307, + "objective/train/tokens_used": 1223045600, + "objective/train/value_avg": -0.0162200927734375, + "objective/train/value_loss": 0.0024968020152300596, + "objective/train/value_max": -0.00036978721618652344, + "objective/train/value_min": -0.5185546875, + "objective/train/value_reward_corr": 0.3092676212831545, + "objective/train/value_std": 0.024993896484375, + "objective/train/weight_avg": 1.0000941753387451, + "objective/train/weighted_lm_loss": 3.238574266433716, + "objective/train/weights_max": 1.0445295572280884, + "objective/train/weights_min": 0.9290300607681274, + "theoretical_loss": 3.5867529732303307, + "tokens_seen": 1202585600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006420545746388443, + "loss": 1.5842, + "theoretical_loss": 3.5867170302662537, + "tokens_seen": 1202716672 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006417335473515249, + "loss": 1.5834, + "theoretical_loss": 3.586429666934716, + "tokens_seen": 1203765248 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0016596200875937939, + "objective/train/docs_used": 684175, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.400723695755005, + "objective/train/original_loss": 3.4007232189178467, + "objective/train/theoretical_loss": 3.5863040462109153, + "objective/train/tokens_used": 1224684000, + "objective/train/value_avg": -0.013885498046875, + "objective/train/value_loss": 0.0012326549040153623, + "objective/train/value_max": -0.00044083595275878906, + "objective/train/value_min": -0.96044921875, + "objective/train/value_reward_corr": 0.28829905691520336, + "objective/train/value_std": 0.01885986328125, + "objective/train/weight_avg": 1.0001720190048218, + "objective/train/weighted_lm_loss": 3.4025895595550537, + "objective/train/weights_max": 1.0948817729949951, + "objective/train/weights_min": 0.9115330576896667, + "theoretical_loss": 3.5863040462109153, + "tokens_seen": 1204224000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006414125200642055, + "loss": 1.5973, + "theoretical_loss": 3.5861426238297964, + "tokens_seen": 1204813824 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.005613624583929777, + "objective/train/docs_used": 684684, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6422274112701416, + "objective/train/original_loss": 3.6422274112701416, + "objective/train/theoretical_loss": 3.585855900316411, + "objective/train/tokens_used": 1226322400, + "objective/train/value_avg": -0.0236053466796875, + "objective/train/value_loss": 0.004418630618602037, + "objective/train/value_max": -0.0006694793701171875, + "objective/train/value_min": -0.76904296875, + "objective/train/value_reward_corr": 0.5602295131805738, + "objective/train/value_std": 0.0386962890625, + "objective/train/weight_avg": 1.000583291053772, + "objective/train/weighted_lm_loss": 3.644181966781616, + "objective/train/weights_max": 1.0434855222702026, + "objective/train/weights_min": 0.9238704442977905, + "theoretical_loss": 3.585855900316411, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.37, + "learning_rate": 0.000641091492776886, + "loss": 1.5968, + "theoretical_loss": 3.585855900316411, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006407704654895666, + "loss": 1.6091, + "theoretical_loss": 3.5855694957612894, + "tokens_seen": 1206910976 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.006207867059856653, + "objective/train/docs_used": 686035, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.353410482406616, + "objective/train/original_loss": 3.353410243988037, + "objective/train/theoretical_loss": 3.585408533129132, + "objective/train/tokens_used": 1227960800, + "objective/train/value_avg": -0.0226287841796875, + "objective/train/value_loss": 0.003391645848751068, + "objective/train/value_max": -0.0005636215209960938, + "objective/train/value_min": -0.89306640625, + "objective/train/value_reward_corr": 0.5169708473232523, + "objective/train/value_std": 0.0364990234375, + "objective/train/weight_avg": 1.0006375312805176, + "objective/train/weighted_lm_loss": 3.3552699089050293, + "objective/train/weights_max": 1.076270341873169, + "objective/train/weights_min": 0.9073458313941956, + "theoretical_loss": 3.585408533129132, + "tokens_seen": 1207500800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006404494382022472, + "loss": 1.6145, + "theoretical_loss": 3.58528340953296, + "tokens_seen": 1207959552 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006401284109149277, + "loss": 1.6029, + "theoretical_loss": 3.5849976410017526, + "tokens_seen": 1209008128 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.006658635102212429, + "objective/train/docs_used": 686549, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.313758373260498, + "objective/train/original_loss": 3.313758611679077, + "objective/train/theoretical_loss": 3.5849619422421393, + "objective/train/tokens_used": 1229599200, + "objective/train/value_avg": -0.0167083740234375, + "objective/train/value_loss": 0.0010053300065919757, + "objective/train/value_max": -0.0007948875427246094, + "objective/train/value_min": -0.360107421875, + "objective/train/value_reward_corr": 0.5062625714040959, + "objective/train/value_std": 0.021026611328125, + "objective/train/weight_avg": 1.0006707906723022, + "objective/train/weighted_lm_loss": 3.3159866333007812, + "objective/train/weights_max": 1.029321551322937, + "objective/train/weights_min": 0.9697403311729431, + "theoretical_loss": 3.5849619422421393, + "tokens_seen": 1209139200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006398073836276084, + "loss": 1.592, + "theoretical_loss": 3.5847121895397844, + "tokens_seen": 1210056704 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -6.035602564224973e-05, + "objective/train/docs_used": 687794, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.08864426612854, + "objective/train/original_loss": 3.088644027709961, + "objective/train/theoretical_loss": 3.5845161252591855, + "objective/train/tokens_used": 1231237600, + "objective/train/value_avg": -0.0173797607421875, + "objective/train/value_loss": 0.003531258786097169, + "objective/train/value_max": -0.0005860328674316406, + "objective/train/value_min": -0.783203125, + "objective/train/value_reward_corr": 0.45372742234859653, + "objective/train/value_std": 0.026336669921875, + "objective/train/weight_avg": 1.0000114440917969, + "objective/train/weighted_lm_loss": 3.088895559310913, + "objective/train/weights_max": 1.037450909614563, + "objective/train/weights_min": 0.9160937070846558, + "theoretical_loss": 3.5845161252591855, + "tokens_seen": 1210777600 + }, + { + "epoch": 0.37, + "learning_rate": 0.000639486356340289, + "loss": 1.6109, + "theoretical_loss": 3.5844270545209582, + "tokens_seen": 1211105280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006391653290529696, + "loss": 1.5926, + "theoretical_loss": 3.584142235320952, + "tokens_seen": 1212153856 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0055386521853506565, + "objective/train/docs_used": 688453, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6586296558380127, + "objective/train/original_loss": 3.6586294174194336, + "objective/train/theoretical_loss": 3.584071079794647, + "objective/train/tokens_used": 1232876000, + "objective/train/value_avg": -0.01361083984375, + "objective/train/value_loss": 0.0018196371383965015, + "objective/train/value_max": -0.000507354736328125, + "objective/train/value_min": -0.58740234375, + "objective/train/value_reward_corr": 0.39342961390072606, + "objective/train/value_std": 0.02142333984375, + "objective/train/weight_avg": 1.0005629062652588, + "objective/train/weighted_lm_loss": 3.6602439880371094, + "objective/train/weights_max": 1.0399158000946045, + "objective/train/weights_min": 0.9377127289772034, + "theoretical_loss": 3.584071079794647, + "tokens_seen": 1212416000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006388443017656501, + "loss": 1.6023, + "theoretical_loss": 3.5838577313172157, + "tokens_seen": 1213202432 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0067305355332791805, + "objective/train/docs_used": 689723, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1107189655303955, + "objective/train/original_loss": 3.1107194423675537, + "objective/train/theoretical_loss": 3.583626803473467, + "objective/train/tokens_used": 1234514400, + "objective/train/value_avg": -0.0185089111328125, + "objective/train/value_loss": 0.0031198232900351286, + "objective/train/value_max": -0.0006537437438964844, + "objective/train/value_min": -0.78369140625, + "objective/train/value_reward_corr": 0.2912547870474801, + "objective/train/value_std": 0.0258636474609375, + "objective/train/weight_avg": 1.0006884336471558, + "objective/train/weighted_lm_loss": 3.112713098526001, + "objective/train/weights_max": 1.0789165496826172, + "objective/train/weights_min": 0.9077731370925903, + "theoretical_loss": 3.583626803473467, + "tokens_seen": 1214054400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006385232744783307, + "loss": 1.5875, + "theoretical_loss": 3.5835735418889616, + "tokens_seen": 1214251008 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006382022471910113, + "loss": 1.5674, + "theoretical_loss": 3.583289666417161, + "tokens_seen": 1215299584 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0027805552817881107, + "objective/train/docs_used": 690230, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3780670166015625, + "objective/train/original_loss": 3.3780672550201416, + "objective/train/theoretical_loss": 3.583183293931091, + "objective/train/tokens_used": 1236152800, + "objective/train/value_avg": -0.017242431640625, + "objective/train/value_loss": 0.0035327356308698654, + "objective/train/value_max": -0.0005793571472167969, + "objective/train/value_min": -0.50341796875, + "objective/train/value_reward_corr": 0.31096934648828173, + "objective/train/value_std": 0.0247344970703125, + "objective/train/weight_avg": 1.0002955198287964, + "objective/train/weighted_lm_loss": 3.3777854442596436, + "objective/train/weights_max": 1.0430200099945068, + "objective/train/weights_min": 0.9221920371055603, + "theoretical_loss": 3.583183293931091, + "tokens_seen": 1215692800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006378812199036919, + "loss": 1.5654, + "theoretical_loss": 3.5830061042845363, + "tokens_seen": 1216348160 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.02640485018491745, + "objective/train/docs_used": 691368, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.441702127456665, + "objective/train/original_loss": 3.4417014122009277, + "objective/train/theoretical_loss": 3.58274054881341, + "objective/train/tokens_used": 1237791200, + "objective/train/value_avg": -0.0210723876953125, + "objective/train/value_loss": 0.023640312254428864, + "objective/train/value_max": -0.00033283233642578125, + "objective/train/value_min": -0.970703125, + "objective/train/value_reward_corr": 0.44078606070727766, + "objective/train/value_std": 0.034698486328125, + "objective/train/weight_avg": 0.9974755644798279, + "objective/train/weighted_lm_loss": 3.4390933513641357, + "objective/train/weights_max": 1.0787512063980103, + "objective/train/weights_min": 0.9129968285560608, + "theoretical_loss": 3.58274054881341, + "tokens_seen": 1217331200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006375601926163724, + "loss": 1.5843, + "theoretical_loss": 3.582722854875552, + "tokens_seen": 1217396736 + }, + { + "epoch": 0.37, + "learning_rate": 0.000637239165329053, + "loss": 1.5588, + "theoretical_loss": 3.5824399175764126, + "tokens_seen": 1218445312 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.0030323921237140894, + "objective/train/docs_used": 692060, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.246213912963867, + "objective/train/original_loss": 3.2462141513824463, + "objective/train/theoretical_loss": 3.5822985657766973, + "objective/train/tokens_used": 1239429600, + "objective/train/value_avg": -0.0176544189453125, + "objective/train/value_loss": 0.003978956025093794, + "objective/train/value_max": -0.0006022453308105469, + "objective/train/value_min": -0.943359375, + "objective/train/value_reward_corr": 0.5260957883740961, + "objective/train/value_std": 0.0297088623046875, + "objective/train/weight_avg": 0.9997163414955139, + "objective/train/weighted_lm_loss": 3.2463860511779785, + "objective/train/weights_max": 1.0387128591537476, + "objective/train/weights_min": 0.9142448306083679, + "theoretical_loss": 3.5822985657766973, + "tokens_seen": 1218969600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006369181380417336, + "loss": 1.5713, + "theoretical_loss": 3.5821572917750535, + "tokens_seen": 1219493888 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006365971107544141, + "loss": 1.5696, + "theoretical_loss": 3.5818749768611364, + "tokens_seen": 1220542464 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0070277852937579155, + "objective/train/docs_used": 693613, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.844902276992798, + "objective/train/original_loss": 2.844902515411377, + "objective/train/theoretical_loss": 3.581857342487549, + "objective/train/tokens_used": 1241068000, + "objective/train/value_avg": -0.015869140625, + "objective/train/value_loss": 0.0022672894410789013, + "objective/train/value_max": -0.000614166259765625, + "objective/train/value_min": -0.86767578125, + "objective/train/value_reward_corr": 0.3625419996373638, + "objective/train/value_std": 0.0198822021484375, + "objective/train/weight_avg": 1.0007139444351196, + "objective/train/weighted_lm_loss": 2.846351385116577, + "objective/train/weights_max": 1.024009108543396, + "objective/train/weights_min": 0.9064587354660034, + "theoretical_loss": 3.581857342487549, + "tokens_seen": 1220608000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006362760834670947, + "loss": 1.57, + "theoretical_loss": 3.5815929722260402, + "tokens_seen": 1221591040 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.008467234671115875, + "objective/train/docs_used": 694027, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4216129779815674, + "objective/train/original_loss": 3.4216129779815674, + "objective/train/theoretical_loss": 3.5814168766228267, + "objective/train/tokens_used": 1242706400, + "objective/train/value_avg": -0.0191650390625, + "objective/train/value_loss": 0.0034279702231287956, + "objective/train/value_max": -0.0006022453308105469, + "objective/train/value_min": -0.328857421875, + "objective/train/value_reward_corr": 0.2348651855540066, + "objective/train/value_std": 0.027984619140625, + "objective/train/weight_avg": 1.0008636713027954, + "objective/train/weighted_lm_loss": 3.424748420715332, + "objective/train/weights_max": 1.030118703842163, + "objective/train/weights_min": 0.9318027496337891, + "theoretical_loss": 3.5814168766228267, + "tokens_seen": 1222246400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006359550561797754, + "loss": 1.5801, + "theoretical_loss": 3.5813112772628575, + "tokens_seen": 1222639616 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635634028892456, + "loss": 1.5804, + "theoretical_loss": 3.581029891366387, + "tokens_seen": 1223688192 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.00044351094402372837, + "objective/train/docs_used": 695222, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1835641860961914, + "objective/train/original_loss": 3.183563709259033, + "objective/train/theoretical_loss": 3.580977165869598, + "objective/train/tokens_used": 1244344800, + "objective/train/value_avg": -0.01393890380859375, + "objective/train/value_loss": 0.005784416105598211, + "objective/train/value_max": -0.000820159912109375, + "objective/train/value_min": -0.681640625, + "objective/train/value_reward_corr": 0.39026872497583653, + "objective/train/value_std": 0.0257568359375, + "objective/train/weight_avg": 1.0000725984573364, + "objective/train/weighted_lm_loss": 3.1830315589904785, + "objective/train/weights_max": 1.046161413192749, + "objective/train/weights_min": 0.9102701544761658, + "theoretical_loss": 3.580977165869598, + "tokens_seen": 1223884800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006353130016051365, + "loss": 1.5598, + "theoretical_loss": 3.5807488139331274, + "tokens_seen": 1224736768 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.02231050282716751, + "objective/train/docs_used": 695595, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.4423232078552246, + "objective/train/original_loss": 2.4423234462738037, + "objective/train/theoretical_loss": 3.580538207925077, + "objective/train/tokens_used": 1245983200, + "objective/train/value_avg": -0.0170440673828125, + "objective/train/value_loss": 0.010890482924878597, + "objective/train/value_max": -0.0006022453308105469, + "objective/train/value_min": -0.70849609375, + "objective/train/value_reward_corr": 0.4143678878827061, + "objective/train/value_std": 0.0273284912109375, + "objective/train/weight_avg": 0.997822642326355, + "objective/train/weighted_lm_loss": 2.4379377365112305, + "objective/train/weights_max": 1.0425090789794922, + "objective/train/weights_min": 0.9073599576950073, + "theoretical_loss": 3.580538207925077, + "tokens_seen": 1225523200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006349919743178171, + "loss": 1.5687, + "theoretical_loss": 3.5804680443612718, + "tokens_seen": 1225785344 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006346709470304977, + "loss": 1.5569, + "theoretical_loss": 3.5801875820506988, + "tokens_seen": 1226833920 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.002137209288775921, + "objective/train/docs_used": 696474, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7186007499694824, + "objective/train/original_loss": 3.7186007499694824, + "objective/train/theoretical_loss": 3.580100000496566, + "objective/train/tokens_used": 1247621600, + "objective/train/value_avg": -0.0174560546875, + "objective/train/value_loss": 0.004006453789770603, + "objective/train/value_max": -0.0005702972412109375, + "objective/train/value_min": -0.59912109375, + "objective/train/value_reward_corr": 0.4888195436857566, + "objective/train/value_std": 0.0223388671875, + "objective/train/weight_avg": 0.9998059868812561, + "objective/train/weighted_lm_loss": 3.718759536743164, + "objective/train/weights_max": 1.0287156105041504, + "objective/train/weights_min": 0.9155946373939514, + "theoretical_loss": 3.580100000496566, + "tokens_seen": 1227161600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006343499197431782, + "loss": 1.5676, + "theoretical_loss": 3.579907426402972, + "tokens_seen": 1227882496 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.011675610207021236, + "objective/train/docs_used": 696846, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.869650363922119, + "objective/train/original_loss": 2.869649887084961, + "objective/train/theoretical_loss": 3.579662541301401, + "objective/train/tokens_used": 1249260000, + "objective/train/value_avg": -0.0253753662109375, + "objective/train/value_loss": 0.0014087604358792305, + "objective/train/value_max": -0.0007886886596679688, + "objective/train/value_min": -0.65478515625, + "objective/train/value_reward_corr": 0.4779846488409249, + "objective/train/value_std": 0.032684326171875, + "objective/train/weight_avg": 1.0011746883392334, + "objective/train/weighted_lm_loss": 2.873872995376587, + "objective/train/weights_max": 1.0366755723953247, + "objective/train/weights_min": 0.9413391947746277, + "theoretical_loss": 3.579662541301401, + "tokens_seen": 1228800000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006340288924558587, + "loss": 1.5788, + "theoretical_loss": 3.579627576821328, + "tokens_seen": 1228931072 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006337078651685393, + "loss": 1.559, + "theoretical_loss": 3.579348032710672, + "tokens_seen": 1229979648 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.00812232494354248, + "objective/train/docs_used": 698095, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4857265949249268, + "objective/train/original_loss": 3.4857263565063477, + "objective/train/theoretical_loss": 3.5792258280668903, + "objective/train/tokens_used": 1250898400, + "objective/train/value_avg": -0.01361846923828125, + "objective/train/value_loss": 0.0017856706399470568, + "objective/train/value_max": -0.0004954338073730469, + "objective/train/value_min": -0.89013671875, + "objective/train/value_reward_corr": 0.28026082074047853, + "objective/train/value_std": 0.0275115966796875, + "objective/train/weight_avg": 1.0008211135864258, + "objective/train/weighted_lm_loss": 3.487849712371826, + "objective/train/weights_max": 1.0590230226516724, + "objective/train/weights_min": 0.9098955392837524, + "theoretical_loss": 3.5792258280668903, + "tokens_seen": 1230438400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006333868378812199, + "loss": 1.5908, + "theoretical_loss": 3.5790687934775747, + "tokens_seen": 1231028224 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.006315195932984352, + "objective/train/docs_used": 698724, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0727009773254395, + "objective/train/original_loss": 3.0727007389068604, + "objective/train/theoretical_loss": 3.5787898585302615, + "objective/train/tokens_used": 1252536800, + "objective/train/value_avg": -0.027374267578125, + "objective/train/value_loss": 0.0036822997499257326, + "objective/train/value_max": -0.0008559226989746094, + "objective/train/value_min": -0.953125, + "objective/train/value_reward_corr": 0.7239612766820581, + "objective/train/value_std": 0.054718017578125, + "objective/train/weight_avg": 1.0006496906280518, + "objective/train/weighted_lm_loss": 3.0754942893981934, + "objective/train/weights_max": 1.0724139213562012, + "objective/train/weights_min": 0.9098600149154663, + "theoretical_loss": 3.5787898585302615, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006330658105939004, + "loss": 1.5609, + "theoretical_loss": 3.5787898585302615, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.37, + "learning_rate": 0.000632744783306581, + "loss": 1.562, + "theoretical_loss": 3.57851122727861, + "tokens_seen": 1233125376 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0025057708844542503, + "objective/train/docs_used": 699861, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2136852741241455, + "objective/train/original_loss": 3.2136855125427246, + "objective/train/theoretical_loss": 3.578354630438602, + "objective/train/tokens_used": 1254175200, + "objective/train/value_avg": -0.0119171142578125, + "objective/train/value_loss": 0.0015136059373617172, + "objective/train/value_max": -0.0002913475036621094, + "objective/train/value_min": -0.2437744140625, + "objective/train/value_reward_corr": 0.26162491962787304, + "objective/train/value_std": 0.0142059326171875, + "objective/train/weight_avg": 1.0002580881118774, + "objective/train/weighted_lm_loss": 3.214472770690918, + "objective/train/weights_max": 1.023878574371338, + "objective/train/weights_min": 0.9337027668952942, + "theoretical_loss": 3.578354630438602, + "tokens_seen": 1233715200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006324237560192616, + "loss": 1.577, + "theoretical_loss": 3.578232899134143, + "tokens_seen": 1234173952 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006321027287319421, + "loss": 1.5529, + "theoretical_loss": 3.5779548735100217, + "tokens_seen": 1235222528 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.007695542182773352, + "objective/train/docs_used": 700550, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.394077777862549, + "objective/train/original_loss": 3.3940775394439697, + "objective/train/theoretical_loss": 3.577920141548805, + "objective/train/tokens_used": 1255813600, + "objective/train/value_avg": -0.0176849365234375, + "objective/train/value_loss": 0.003291845554485917, + "objective/train/value_max": -0.0005745887756347656, + "objective/train/value_min": -0.93359375, + "objective/train/value_reward_corr": 0.39941502530153306, + "objective/train/value_std": 0.029937744140625, + "objective/train/weight_avg": 1.0007857084274292, + "objective/train/weighted_lm_loss": 3.396925926208496, + "objective/train/weights_max": 1.0690255165100098, + "objective/train/weights_min": 0.9090920686721802, + "theoretical_loss": 3.577920141548805, + "tokens_seen": 1235353600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006317817014446228, + "loss": 1.5814, + "theoretical_loss": 3.5776771498210413, + "tokens_seen": 1236271104 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0043516457080841064, + "objective/train/docs_used": 701918, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8490347862243652, + "objective/train/original_loss": 2.8490352630615234, + "objective/train/theoretical_loss": 3.577486389627513, + "objective/train/tokens_used": 1257452000, + "objective/train/value_avg": -0.01129150390625, + "objective/train/value_loss": 0.0015278132632374763, + "objective/train/value_max": -0.0005955696105957031, + "objective/train/value_min": -0.34765625, + "objective/train/value_reward_corr": 0.35059204729615545, + "objective/train/value_std": 0.01555633544921875, + "objective/train/weight_avg": 1.0004427433013916, + "objective/train/weighted_lm_loss": 2.8498871326446533, + "objective/train/weights_max": 1.0336034297943115, + "objective/train/weights_min": 0.9215505123138428, + "theoretical_loss": 3.577486389627513, + "tokens_seen": 1236992000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006314606741573034, + "loss": 1.5555, + "theoretical_loss": 3.5773997274836224, + "tokens_seen": 1237319680 + }, + { + "epoch": 0.38, + "learning_rate": 0.000631139646869984, + "loss": 1.5484, + "theoretical_loss": 3.577122605915809, + "tokens_seen": 1238368256 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.0019931539427489042, + "objective/train/docs_used": 702757, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.846012830734253, + "objective/train/original_loss": 2.846012830734253, + "objective/train/theoretical_loss": 3.5770533724510627, + "objective/train/tokens_used": 1259090400, + "objective/train/value_avg": -0.0214080810546875, + "objective/train/value_loss": 0.002489727223291993, + "objective/train/value_max": -0.0003418922424316406, + "objective/train/value_min": -0.43505859375, + "objective/train/value_reward_corr": 0.7947948411056658, + "objective/train/value_std": 0.04229736328125, + "objective/train/weight_avg": 0.9998130202293396, + "objective/train/weighted_lm_loss": 2.846713066101074, + "objective/train/weights_max": 1.0412452220916748, + "objective/train/weights_min": 0.9365275502204895, + "theoretical_loss": 3.5770533724510627, + "tokens_seen": 1238630400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006308186195826645, + "loss": 1.5595, + "theoretical_loss": 3.5768457845372597, + "tokens_seen": 1239416832 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.00019837176660075784, + "objective/train/docs_used": 703958, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2027368545532227, + "objective/train/original_loss": 3.2027366161346436, + "objective/train/theoretical_loss": 3.5766210878054308, + "objective/train/tokens_used": 1260728800, + "objective/train/value_avg": -0.0174407958984375, + "objective/train/value_loss": 0.00481156213209033, + "objective/train/value_max": -0.0005173683166503906, + "objective/train/value_min": -0.8662109375, + "objective/train/value_reward_corr": 0.49853237632326297, + "objective/train/value_std": 0.0298004150390625, + "objective/train/weight_avg": 1.000043511390686, + "objective/train/weighted_lm_loss": 3.203795909881592, + "objective/train/weights_max": 1.0350595712661743, + "objective/train/weights_min": 0.916368842124939, + "theoretical_loss": 3.5766210878054308, + "tokens_seen": 1240268800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006304975922953451, + "loss": 1.5638, + "theoretical_loss": 3.576569262769242, + "tokens_seen": 1240465408 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006301765650080257, + "loss": 1.5799, + "theoretical_loss": 3.576293040034628, + "tokens_seen": 1241513984 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.007936250418424606, + "objective/train/docs_used": 704662, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.389528512954712, + "objective/train/original_loss": 3.389528512954712, + "objective/train/theoretical_loss": 3.576189533486179, + "objective/train/tokens_used": 1262367200, + "objective/train/value_avg": -0.017242431640625, + "objective/train/value_loss": 0.012083721347153187, + "objective/train/value_max": -0.0005130767822265625, + "objective/train/value_min": -0.69677734375, + "objective/train/value_reward_corr": 0.29858406793727177, + "objective/train/value_std": 0.02813720703125, + "objective/train/weight_avg": 0.9992654323577881, + "objective/train/weighted_lm_loss": 3.3859939575195312, + "objective/train/weights_max": 1.0376167297363281, + "objective/train/weights_min": 0.9054518342018127, + "theoretical_loss": 3.576189533486179, + "tokens_seen": 1241907200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006298555377207062, + "loss": 1.5562, + "theoretical_loss": 3.576017115757886, + "tokens_seen": 1242562560 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.006743453908711672, + "objective/train/docs_used": 705833, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.222115993499756, + "objective/train/original_loss": 3.222115993499756, + "objective/train/theoretical_loss": 3.5757587072983994, + "objective/train/tokens_used": 1264005600, + "objective/train/value_avg": -0.01495361328125, + "objective/train/value_loss": 0.0018786455038934946, + "objective/train/value_max": -0.0007791519165039062, + "objective/train/value_min": -0.79736328125, + "objective/train/value_reward_corr": 0.3232248943974751, + "objective/train/value_std": 0.02008056640625, + "objective/train/weight_avg": 1.0006835460662842, + "objective/train/weighted_lm_loss": 3.2241406440734863, + "objective/train/weights_max": 1.0309020280838013, + "objective/train/weights_min": 0.9172037839889526, + "theoretical_loss": 3.5757587072983994, + "tokens_seen": 1243545600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006295345104333868, + "loss": 1.5541, + "theoretical_loss": 3.57574148936508, + "tokens_seen": 1243611136 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006292134831460674, + "loss": 1.5217, + "theoretical_loss": 3.575466160283857, + "tokens_seen": 1244659712 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.000716366630513221, + "objective/train/docs_used": 706471, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.501105785369873, + "objective/train/original_loss": 3.501106023788452, + "objective/train/theoretical_loss": 3.5753286070566617, + "objective/train/tokens_used": 1265644000, + "objective/train/value_avg": -0.03106689453125, + "objective/train/value_loss": 0.007147008553147316, + "objective/train/value_max": -0.0002913475036621094, + "objective/train/value_min": -0.94287109375, + "objective/train/value_reward_corr": 0.4589938329940095, + "objective/train/value_std": 0.047149658203125, + "objective/train/weight_avg": 0.9999635815620422, + "objective/train/weighted_lm_loss": 3.501502275466919, + "objective/train/weights_max": 1.0796409845352173, + "objective/train/weights_min": 0.9235100746154785, + "theoretical_loss": 3.5753286070566617, + "tokens_seen": 1245184000 + }, + { + "epoch": 0.38, + "learning_rate": 0.000628892455858748, + "loss": 1.5408, + "theoretical_loss": 3.575191127943446, + "tokens_seen": 1245708288 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006285714285714285, + "loss": 1.5631, + "theoretical_loss": 3.574916391774651, + "tokens_seen": 1246756864 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0013293677475303411, + "objective/train/docs_used": 707914, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.4995932579040527, + "objective/train/original_loss": 2.499593734741211, + "objective/train/theoretical_loss": 3.574899230584959, + "objective/train/tokens_used": 1267282400, + "objective/train/value_avg": -0.0215911865234375, + "objective/train/value_loss": 0.0047282082960009575, + "objective/train/value_max": -0.0004878044128417969, + "objective/train/value_min": -0.677734375, + "objective/train/value_reward_corr": 0.40612298742460373, + "objective/train/value_std": 0.0316162109375, + "objective/train/weight_avg": 1.0001561641693115, + "objective/train/weighted_lm_loss": 2.5000455379486084, + "objective/train/weights_max": 1.0451871156692505, + "objective/train/weights_min": 0.9082080125808716, + "theoretical_loss": 3.574899230584959, + "tokens_seen": 1246822400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006282504012841091, + "loss": 1.5396, + "theoretical_loss": 3.5746419512098457, + "tokens_seen": 1247805440 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.009201113134622574, + "objective/train/docs_used": 708590, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0837409496307373, + "objective/train/original_loss": 3.0837411880493164, + "objective/train/theoretical_loss": 3.5744705757166564, + "objective/train/tokens_used": 1268920800, + "objective/train/value_avg": -0.014129638671875, + "objective/train/value_loss": 0.0010310726938769221, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.6640625, + "objective/train/value_reward_corr": 0.3868792183962921, + "objective/train/value_std": 0.0197296142578125, + "objective/train/weight_avg": 1.0009253025054932, + "objective/train/weighted_lm_loss": 3.0865190029144287, + "objective/train/weights_max": 1.0369690656661987, + "objective/train/weights_min": 0.9204509854316711, + "theoretical_loss": 3.5744705757166564, + "tokens_seen": 1248460800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006279293739967898, + "loss": 1.5504, + "theoretical_loss": 3.574367805682967, + "tokens_seen": 1248854016 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006276083467094704, + "loss": 1.5524, + "theoretical_loss": 3.57409395462951, + "tokens_seen": 1249902592 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.0010412362171337008, + "objective/train/docs_used": 709757, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9127118587493896, + "objective/train/original_loss": 2.9127120971679688, + "objective/train/theoretical_loss": 3.5740426402944365, + "objective/train/tokens_used": 1270559200, + "objective/train/value_avg": -0.0256805419921875, + "objective/train/value_loss": 0.009791501797735691, + "objective/train/value_max": -0.0005550384521484375, + "objective/train/value_min": -0.9814453125, + "objective/train/value_reward_corr": 0.4026702726323665, + "objective/train/value_std": 0.05291748046875, + "objective/train/weight_avg": 0.9999440908432007, + "objective/train/weighted_lm_loss": 2.91186785697937, + "objective/train/weights_max": 1.0779775381088257, + "objective/train/weights_min": 0.9076849222183228, + "theoretical_loss": 3.5740426402944365, + "tokens_seen": 1250099200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006272873194221509, + "loss": 1.5518, + "theoretical_loss": 3.5738203974865224, + "tokens_seen": 1250951168 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0028789720963686705, + "objective/train/docs_used": 710221, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.7089428901672363, + "objective/train/original_loss": 3.7089431285858154, + "objective/train/theoretical_loss": 3.5736154221702483, + "objective/train/tokens_used": 1272197600, + "objective/train/value_avg": -0.0242767333984375, + "objective/train/value_loss": 0.005146878305822611, + "objective/train/value_max": -0.0006165504455566406, + "objective/train/value_min": -0.98291015625, + "objective/train/value_reward_corr": 0.7774934368343929, + "objective/train/value_std": 0.0889892578125, + "objective/train/weight_avg": 1.000313401222229, + "objective/train/weighted_lm_loss": 3.7102882862091064, + "objective/train/weights_max": 1.063672661781311, + "objective/train/weights_min": 0.9063700437545776, + "theoretical_loss": 3.5736154221702483, + "tokens_seen": 1251737600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006269662921348315, + "loss": 1.5465, + "theoretical_loss": 3.5735471336925984, + "tokens_seen": 1251999744 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006266452648475121, + "loss": 1.5495, + "theoretical_loss": 3.5732741626878743, + "tokens_seen": 1253048320 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0056799957528710365, + "objective/train/docs_used": 710893, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.953261613845825, + "objective/train/original_loss": 2.9532620906829834, + "objective/train/theoretical_loss": 3.573188919205256, + "objective/train/tokens_used": 1273836000, + "objective/train/value_avg": -0.01453399658203125, + "objective/train/value_loss": 0.002610684372484684, + "objective/train/value_max": -0.0006289482116699219, + "objective/train/value_min": -0.5361328125, + "objective/train/value_reward_corr": 0.352052995576823, + "objective/train/value_std": 0.0192718505859375, + "objective/train/weight_avg": 1.0005807876586914, + "objective/train/weighted_lm_loss": 2.9547812938690186, + "objective/train/weights_max": 1.0291643142700195, + "objective/train/weights_min": 0.9077855348587036, + "theoretical_loss": 3.573188919205256, + "tokens_seen": 1253376000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006263242375601926, + "loss": 1.5303, + "theoretical_loss": 3.5730014839140223, + "tokens_seen": 1254096896 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.003361576935276389, + "objective/train/docs_used": 712142, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5169777870178223, + "objective/train/original_loss": 2.5169777870178223, + "objective/train/theoretical_loss": 3.5727631292697843, + "objective/train/tokens_used": 1275474400, + "objective/train/value_avg": -0.01488494873046875, + "objective/train/value_loss": 0.0033646603114902973, + "objective/train/value_max": -0.0005617141723632812, + "objective/train/value_min": -0.90673828125, + "objective/train/value_reward_corr": 0.5186559293280198, + "objective/train/value_std": 0.02764892578125, + "objective/train/weight_avg": 1.0003526210784912, + "objective/train/weighted_lm_loss": 2.5178937911987305, + "objective/train/weights_max": 1.0462313890457153, + "objective/train/weights_min": 0.9089709520339966, + "theoretical_loss": 3.5727631292697843, + "tokens_seen": 1255014400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006260032102728732, + "loss": 1.5874, + "theoretical_loss": 3.5727290968142444, + "tokens_seen": 1255145472 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006256821829855538, + "loss": 1.5586, + "theoretical_loss": 3.572457000833267, + "tokens_seen": 1256194048 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0023293071426451206, + "objective/train/docs_used": 712613, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3992807865142822, + "objective/train/original_loss": 3.399280071258545, + "objective/train/theoretical_loss": 3.572338050243272, + "objective/train/tokens_used": 1277112800, + "objective/train/value_avg": -0.017303466796875, + "objective/train/value_loss": 0.006881974637508392, + "objective/train/value_max": -0.0003597736358642578, + "objective/train/value_min": -0.921875, + "objective/train/value_reward_corr": 0.48260990084687355, + "objective/train/value_std": 0.04901123046875, + "objective/train/weight_avg": 1.0002667903900146, + "objective/train/weighted_lm_loss": 3.4000210762023926, + "objective/train/weights_max": 1.0881576538085938, + "objective/train/weights_min": 0.9122130274772644, + "theoretical_loss": 3.572338050243272, + "tokens_seen": 1256652800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006253611556982344, + "loss": 1.5322, + "theoretical_loss": 3.5721851954173376, + "tokens_seen": 1257242624 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.00573922973126173, + "objective/train/docs_used": 713934, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.845794677734375, + "objective/train/original_loss": 2.845795154571533, + "objective/train/theoretical_loss": 3.571913680014217, + "objective/train/tokens_used": 1278751200, + "objective/train/value_avg": -0.0106353759765625, + "objective/train/value_loss": 0.00013185795978643, + "objective/train/value_max": -0.0004355907440185547, + "objective/train/value_min": -0.1622314453125, + "objective/train/value_reward_corr": 0.4650832460088728, + "objective/train/value_std": 0.008331298828125, + "objective/train/weight_avg": 1.0005745887756348, + "objective/train/weighted_lm_loss": 2.8477020263671875, + "objective/train/weights_max": 1.0160545110702515, + "objective/train/weights_min": 0.9885057210922241, + "theoretical_loss": 3.571913680014217, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006250401284109149, + "loss": 1.5642, + "theoretical_loss": 3.571913680014217, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006247191011235955, + "loss": 1.5735, + "theoretical_loss": 3.5716424540731735, + "tokens_seen": 1259339776 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.0007137022912502289, + "objective/train/docs_used": 714588, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1081035137176514, + "objective/train/original_loss": 3.108103036880493, + "objective/train/theoretical_loss": 3.5714900164801286, + "objective/train/tokens_used": 1280389600, + "objective/train/value_avg": -0.0260009765625, + "objective/train/value_loss": 0.006139517296105623, + "objective/train/value_max": -0.0009474754333496094, + "objective/train/value_min": -0.9306640625, + "objective/train/value_reward_corr": 0.4719846110899416, + "objective/train/value_std": 0.0367431640625, + "objective/train/weight_avg": 0.9999589323997498, + "objective/train/weighted_lm_loss": 3.10750412940979, + "objective/train/weights_max": 1.0428290367126465, + "objective/train/weights_min": 0.9083887338638306, + "theoretical_loss": 3.5714900164801286, + "tokens_seen": 1259929600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006243980738362761, + "loss": 1.5756, + "theoretical_loss": 3.571371517044981, + "tokens_seen": 1260388352 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006240770465489566, + "loss": 1.5494, + "theoretical_loss": 3.571100868381909, + "tokens_seen": 1261436928 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.00581497373059392, + "objective/train/docs_used": 715954, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7732784748077393, + "objective/train/original_loss": 2.77327823638916, + "objective/train/theoretical_loss": 3.5710670575474763, + "objective/train/tokens_used": 1282028000, + "objective/train/value_avg": -0.01134490966796875, + "objective/train/value_loss": 0.00039953069062903523, + "objective/train/value_max": -0.0005483627319335938, + "objective/train/value_min": -0.277099609375, + "objective/train/value_reward_corr": 0.297684978182322, + "objective/train/value_std": 0.0117645263671875, + "objective/train/weight_avg": 1.000583529472351, + "objective/train/weighted_lm_loss": 2.7749037742614746, + "objective/train/weights_max": 1.0276082754135132, + "objective/train/weights_min": 0.9626301527023315, + "theoretical_loss": 3.5710670575474763, + "tokens_seen": 1261568000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006237560192616373, + "loss": 1.5423, + "theoretical_loss": 3.5708305075377207, + "tokens_seen": 1262485504 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.005322805140167475, + "objective/train/docs_used": 716702, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.683406114578247, + "objective/train/original_loss": 2.683405876159668, + "objective/train/theoretical_loss": 3.570644801131639, + "objective/train/tokens_used": 1283666400, + "objective/train/value_avg": -0.01419830322265625, + "objective/train/value_loss": 0.0024749194271862507, + "objective/train/value_max": -0.0006461143493652344, + "objective/train/value_min": -0.460693359375, + "objective/train/value_reward_corr": 0.27793470641415147, + "objective/train/value_std": 0.016326904296875, + "objective/train/weight_avg": 1.0005443096160889, + "objective/train/weighted_lm_loss": 2.684680938720703, + "objective/train/weights_max": 1.0353537797927856, + "objective/train/weights_min": 0.909543514251709, + "theoretical_loss": 3.570644801131639, + "tokens_seen": 1263206400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006234349919743179, + "loss": 1.5199, + "theoretical_loss": 3.5705604339676666, + "tokens_seen": 1263534080 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006231139646869985, + "loss": 1.5325, + "theoretical_loss": 3.57029064712848, + "tokens_seen": 1264582656 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.006513949949294329, + "objective/train/docs_used": 717941, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1867563724517822, + "objective/train/original_loss": 3.1867563724517822, + "objective/train/theoretical_loss": 3.570223245156858, + "objective/train/tokens_used": 1285304800, + "objective/train/value_avg": -0.01200103759765625, + "objective/train/value_loss": 0.0006638810154981911, + "objective/train/value_max": -0.0005211830139160156, + "objective/train/value_min": -0.326904296875, + "objective/train/value_reward_corr": 0.20354033935326526, + "objective/train/value_std": 0.01432037353515625, + "objective/train/weight_avg": 1.000654697418213, + "objective/train/weighted_lm_loss": 3.1884493827819824, + "objective/train/weights_max": 1.0266097784042358, + "objective/train/weights_min": 0.949505090713501, + "theoretical_loss": 3.570223245156858, + "tokens_seen": 1264844800 + }, + { + "epoch": 0.38, + "learning_rate": 0.000622792937399679, + "loss": 1.5658, + "theoretical_loss": 3.5700211464783687, + "tokens_seen": 1265631232 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.005546371452510357, + "objective/train/docs_used": 718481, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.189340114593506, + "objective/train/original_loss": 3.189340114593506, + "objective/train/theoretical_loss": 3.5698023875561855, + "objective/train/tokens_used": 1286943200, + "objective/train/value_avg": -0.0162506103515625, + "objective/train/value_loss": 0.0030232477001845837, + "objective/train/value_max": -0.0005745887756347656, + "objective/train/value_min": -0.78466796875, + "objective/train/value_reward_corr": 0.5412249640335004, + "objective/train/value_std": 0.04150390625, + "objective/train/weight_avg": 1.0005695819854736, + "objective/train/weighted_lm_loss": 3.1911797523498535, + "objective/train/weights_max": 1.0526536703109741, + "objective/train/weights_min": 0.9207553267478943, + "theoretical_loss": 3.5698023875561855, + "tokens_seen": 1266483200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006224719101123596, + "loss": 1.5666, + "theoretical_loss": 3.5697519314770148, + "tokens_seen": 1266679808 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006221508828250402, + "loss": 1.5714, + "theoretical_loss": 3.5694830015855636, + "tokens_seen": 1267728384 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.010997762903571129, + "objective/train/docs_used": 719716, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.050309896469116, + "objective/train/original_loss": 3.050309896469116, + "objective/train/theoretical_loss": 3.569382226271438, + "objective/train/tokens_used": 1288581600, + "objective/train/value_avg": -0.050384521484375, + "objective/train/value_loss": 0.014732768759131432, + "objective/train/value_max": -0.0009775161743164062, + "objective/train/value_min": -0.95703125, + "objective/train/value_reward_corr": 0.399338028612395, + "objective/train/value_std": 0.10137939453125, + "objective/train/weight_avg": 1.0011733770370483, + "objective/train/weighted_lm_loss": 3.053375005722046, + "objective/train/weights_max": 1.0940364599227905, + "objective/train/weights_min": 0.9090481996536255, + "theoretical_loss": 3.569382226271438, + "tokens_seen": 1268121600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006218298555377207, + "loss": 1.5291, + "theoretical_loss": 3.569214356266625, + "tokens_seen": 1268776960 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0022937681060284376, + "objective/train/docs_used": 720440, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5129289627075195, + "objective/train/original_loss": 3.5129294395446777, + "objective/train/theoretical_loss": 3.5689627592531465, + "objective/train/tokens_used": 1290220000, + "objective/train/value_avg": -0.045166015625, + "objective/train/value_loss": 0.015243802219629288, + "objective/train/value_max": -0.0008330345153808594, + "objective/train/value_min": -0.986328125, + "objective/train/value_reward_corr": 0.6058789895849609, + "objective/train/value_std": 0.09039306640625, + "objective/train/weight_avg": 1.0003043413162231, + "objective/train/weighted_lm_loss": 3.513439416885376, + "objective/train/weights_max": 1.082072138786316, + "objective/train/weights_min": 0.9076531529426575, + "theoretical_loss": 3.5689627592531465, + "tokens_seen": 1269760000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006215088282504013, + "loss": 1.5735, + "theoretical_loss": 3.5689459949842623, + "tokens_seen": 1269825536 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006211878009630819, + "loss": 1.5614, + "theoretical_loss": 3.5686779172039906, + "tokens_seen": 1270874112 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.025514157488942146, + "objective/train/docs_used": 721295, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0937066078186035, + "objective/train/original_loss": 3.0937066078186035, + "objective/train/theoretical_loss": 3.568543984460508, + "objective/train/tokens_used": 1291858400, + "objective/train/value_avg": -0.0372314453125, + "objective/train/value_loss": 0.015912124887108803, + "objective/train/value_max": -0.00043892860412597656, + "objective/train/value_min": -0.935546875, + "objective/train/value_reward_corr": 0.9276541765261692, + "objective/train/value_std": 0.106201171875, + "objective/train/weight_avg": 0.9975267052650452, + "objective/train/weighted_lm_loss": 3.0896050930023193, + "objective/train/weights_max": 1.0308668613433838, + "objective/train/weights_min": 0.9165350794792175, + "theoretical_loss": 3.568543984460508, + "tokens_seen": 1271398400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006208667736757625, + "loss": 1.5286, + "theoretical_loss": 3.5684101223927702, + "tokens_seen": 1271922688 + }, + { + "epoch": 0.39, + "learning_rate": 0.000620545746388443, + "loss": 1.5619, + "theoretical_loss": 3.568142610019003, + "tokens_seen": 1272971264 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.00019397764117456973, + "objective/train/docs_used": 722001, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9364047050476074, + "objective/train/original_loss": 2.9364047050476074, + "objective/train/theoretical_loss": 3.5681258998613385, + "objective/train/tokens_used": 1293496800, + "objective/train/value_avg": -0.0291748046875, + "objective/train/value_loss": 0.005107827018946409, + "objective/train/value_max": -0.000743865966796875, + "objective/train/value_min": -0.95751953125, + "objective/train/value_reward_corr": 0.6814899179498707, + "objective/train/value_std": 0.05218505859375, + "objective/train/weight_avg": 1.000005841255188, + "objective/train/weighted_lm_loss": 2.935725212097168, + "objective/train/weights_max": 1.0904104709625244, + "objective/train/weights_min": 0.9096202850341797, + "theoretical_loss": 3.5681258998613385, + "tokens_seen": 1273036800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006202247191011236, + "loss": 1.5379, + "theoretical_loss": 3.567875379552525, + "tokens_seen": 1274019840 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.005766416434198618, + "objective/train/docs_used": 723183, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.154763698577881, + "objective/train/original_loss": 3.1547632217407227, + "objective/train/theoretical_loss": 3.5677085034320273, + "objective/train/tokens_used": 1295135200, + "objective/train/value_avg": -0.014434814453125, + "objective/train/value_loss": 0.001521629048511386, + "objective/train/value_max": -0.0004973411560058594, + "objective/train/value_min": -0.28564453125, + "objective/train/value_reward_corr": 0.5752860274920543, + "objective/train/value_std": 0.0245361328125, + "objective/train/weight_avg": 1.0005841255187988, + "objective/train/weighted_lm_loss": 3.156256675720215, + "objective/train/weights_max": 1.0222896337509155, + "objective/train/weights_min": 0.9247938394546509, + "theoretical_loss": 3.5677085034320273, + "tokens_seen": 1274675200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006199036918138043, + "loss": 1.5898, + "theoretical_loss": 3.567608430464604, + "tokens_seen": 1275068416 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006195826645264849, + "loss": 1.543, + "theoretical_loss": 3.567341762227932, + "tokens_seen": 1276116992 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.001852215500548482, + "objective/train/docs_used": 723876, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9814891815185547, + "objective/train/original_loss": 2.981489658355713, + "objective/train/theoretical_loss": 3.567291793157486, + "objective/train/tokens_used": 1296773600, + "objective/train/value_avg": -0.012939453125, + "objective/train/value_loss": 0.0018953698454424739, + "objective/train/value_max": -0.0005745887756347656, + "objective/train/value_min": -0.428466796875, + "objective/train/value_reward_corr": 0.27754197795591046, + "objective/train/value_std": 0.0188140869140625, + "objective/train/weight_avg": 1.0001946687698364, + "objective/train/weighted_lm_loss": 2.9828195571899414, + "objective/train/weights_max": 1.0429232120513916, + "objective/train/weights_min": 0.9491299986839294, + "theoretical_loss": 3.567291793157486, + "tokens_seen": 1276313600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006192616372391653, + "loss": 1.5721, + "theoretical_loss": 3.567075374316623, + "tokens_seen": 1277165568 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.002112989081069827, + "objective/train/docs_used": 725220, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9559223651885986, + "objective/train/original_loss": 2.9559223651885986, + "objective/train/theoretical_loss": 3.566875767031105, + "objective/train/tokens_used": 1298412000, + "objective/train/value_avg": -0.01512908935546875, + "objective/train/value_loss": 0.0016433697892352939, + "objective/train/value_max": -0.0004935264587402344, + "objective/train/value_min": -0.411865234375, + "objective/train/value_reward_corr": 0.501197278401125, + "objective/train/value_std": 0.020904541015625, + "objective/train/weight_avg": 1.000219464302063, + "objective/train/weighted_lm_loss": 2.9569079875946045, + "objective/train/weights_max": 1.0419416427612305, + "objective/train/weights_min": 0.942220151424408, + "theoretical_loss": 3.566875767031105, + "tokens_seen": 1277952000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006189406099518459, + "loss": 1.5827, + "theoretical_loss": 3.5668092662062048, + "tokens_seen": 1278214144 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006186195826645265, + "loss": 1.5435, + "theoretical_loss": 3.566543437373617, + "tokens_seen": 1279262720 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.008426793850958347, + "objective/train/docs_used": 725568, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9621877670288086, + "objective/train/original_loss": 2.962188243865967, + "objective/train/theoretical_loss": 3.5664604230547052, + "objective/train/tokens_used": 1300050400, + "objective/train/value_avg": -0.0127716064453125, + "objective/train/value_loss": 0.0013100688811391592, + "objective/train/value_max": -0.00070953369140625, + "objective/train/value_min": -0.572265625, + "objective/train/value_reward_corr": 0.36529383676677446, + "objective/train/value_std": 0.0189971923828125, + "objective/train/weight_avg": 1.0008491277694702, + "objective/train/weighted_lm_loss": 2.964726448059082, + "objective/train/weights_max": 1.0340814590454102, + "objective/train/weights_min": 0.9180997014045715, + "theoretical_loss": 3.5664604230547052, + "tokens_seen": 1279590400 + }, + { + "epoch": 0.39, + "learning_rate": 0.000618298555377207, + "loss": 1.5798, + "theoretical_loss": 3.5662778872972036, + "tokens_seen": 1280311296 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.006205847021192312, + "objective/train/docs_used": 726917, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4691078662872314, + "objective/train/original_loss": 3.4691076278686523, + "objective/train/theoretical_loss": 3.5660457592384924, + "objective/train/tokens_used": 1301688800, + "objective/train/value_avg": -0.01329803466796875, + "objective/train/value_loss": 0.0009621668141335249, + "objective/train/value_max": -0.0003459453582763672, + "objective/train/value_min": -0.83056640625, + "objective/train/value_reward_corr": 0.4989929754429993, + "objective/train/value_std": 0.026824951171875, + "objective/train/weight_avg": 1.0006253719329834, + "objective/train/weighted_lm_loss": 3.4715945720672607, + "objective/train/weights_max": 1.0671260356903076, + "objective/train/weights_min": 0.9175925254821777, + "theoretical_loss": 3.5660457592384924, + "tokens_seen": 1281228800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006179775280898876, + "loss": 1.5932, + "theoretical_loss": 3.56601261545671, + "tokens_seen": 1281359872 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006176565008025682, + "loss": 1.5587, + "theoretical_loss": 3.565747621333277, + "tokens_seen": 1282408448 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0071050385013222694, + "objective/train/docs_used": 727506, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.343418598175049, + "objective/train/original_loss": 3.3434183597564697, + "objective/train/theoretical_loss": 3.5656317736010132, + "objective/train/tokens_used": 1303327200, + "objective/train/value_avg": -0.0147552490234375, + "objective/train/value_loss": 0.002027018228545785, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.82080078125, + "objective/train/value_reward_corr": 0.4307900653376698, + "objective/train/value_std": 0.032501220703125, + "objective/train/weight_avg": 1.000720500946045, + "objective/train/weighted_lm_loss": 3.3458359241485596, + "objective/train/weights_max": 1.0576573610305786, + "objective/train/weights_min": 0.9066044688224792, + "theoretical_loss": 3.5656317736010132, + "tokens_seen": 1282867200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006173354735152488, + "loss": 1.5437, + "theoretical_loss": 3.565482904409436, + "tokens_seen": 1283457024 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.007978660985827446, + "objective/train/docs_used": 728764, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.363269805908203, + "objective/train/original_loss": 3.363269329071045, + "objective/train/theoretical_loss": 3.5652184641691047, + "objective/train/tokens_used": 1304965600, + "objective/train/value_avg": -0.0225372314453125, + "objective/train/value_loss": 0.003909376449882984, + "objective/train/value_max": -0.00075531005859375, + "objective/train/value_min": -0.96240234375, + "objective/train/value_reward_corr": 0.5778593838555249, + "objective/train/value_std": 0.044586181640625, + "objective/train/weight_avg": 1.0008171796798706, + "objective/train/weighted_lm_loss": 3.3661880493164062, + "objective/train/weights_max": 1.061923861503601, + "objective/train/weights_min": 0.9092108011245728, + "theoretical_loss": 3.5652184641691047, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006170144462279293, + "loss": 1.5713, + "theoretical_loss": 3.5652184641691047, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006166934189406099, + "loss": 1.5393, + "theoretical_loss": 3.5649543000975825, + "tokens_seen": 1285554176 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.009498201310634613, + "objective/train/docs_used": 729512, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6639723777770996, + "objective/train/original_loss": 2.6639723777770996, + "objective/train/theoretical_loss": 3.564805828977856, + "objective/train/tokens_used": 1306604000, + "objective/train/value_avg": -0.0185546875, + "objective/train/value_loss": 0.002410725923255086, + "objective/train/value_max": -0.0006461143493652344, + "objective/train/value_min": -0.94189453125, + "objective/train/value_reward_corr": 0.7495609517491428, + "objective/train/value_std": 0.052032470703125, + "objective/train/weight_avg": 1.0009617805480957, + "objective/train/weighted_lm_loss": 2.665766716003418, + "objective/train/weights_max": 1.0685698986053467, + "objective/train/weights_min": 0.9103714227676392, + "theoretical_loss": 3.564805828977856, + "tokens_seen": 1286144000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006163723916532905, + "loss": 1.5835, + "theoretical_loss": 3.564690411681543, + "tokens_seen": 1286602752 + }, + { + "epoch": 0.39, + "learning_rate": 0.000616051364365971, + "loss": 1.5739, + "theoretical_loss": 3.564426798409034, + "tokens_seen": 1287651328 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.016187189146876335, + "objective/train/docs_used": 730476, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2150604724884033, + "objective/train/original_loss": 3.215059995651245, + "objective/train/theoretical_loss": 3.5643938660705556, + "objective/train/tokens_used": 1308242400, + "objective/train/value_avg": -0.0438232421875, + "objective/train/value_loss": 0.0078775929287076, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.9736328125, + "objective/train/value_reward_corr": 0.6570164918911123, + "objective/train/value_std": 0.09429931640625, + "objective/train/weight_avg": 1.0016579627990723, + "objective/train/weighted_lm_loss": 3.2194149494171143, + "objective/train/weights_max": 1.0840376615524292, + "objective/train/weights_min": 0.9137952327728271, + "theoretical_loss": 3.5643938660705556, + "tokens_seen": 1287782400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006157303370786517, + "loss": 1.5586, + "theoretical_loss": 3.5641634597694685, + "tokens_seen": 1288699904 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.004058612044900656, + "objective/train/docs_used": 731120, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9023966789245605, + "objective/train/original_loss": 2.9023966789245605, + "objective/train/theoretical_loss": 3.563982573498654, + "objective/train/tokens_used": 1309880800, + "objective/train/value_avg": -0.0313720703125, + "objective/train/value_loss": 0.0043548657558858395, + "objective/train/value_max": -0.0006337165832519531, + "objective/train/value_min": -0.935546875, + "objective/train/value_reward_corr": 0.8610209468067951, + "objective/train/value_std": 0.08111572265625, + "objective/train/weight_avg": 0.9996156692504883, + "objective/train/weighted_lm_loss": 2.9014267921447754, + "objective/train/weights_max": 1.0465449094772339, + "objective/train/weights_min": 0.9068897366523743, + "theoretical_loss": 3.563982573498654, + "tokens_seen": 1289420800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006154093097913323, + "loss": 1.5592, + "theoretical_loss": 3.5639003952536212, + "tokens_seen": 1289748480 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006150882825040129, + "loss": 1.558, + "theoretical_loss": 3.563637604353625, + "tokens_seen": 1290797056 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.007292288821190596, + "objective/train/docs_used": 732356, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.357142210006714, + "objective/train/original_loss": 3.3571419715881348, + "objective/train/theoretical_loss": 3.5635719493217155, + "objective/train/tokens_used": 1311519200, + "objective/train/value_avg": -0.0152435302734375, + "objective/train/value_loss": 0.0017515128711238503, + "objective/train/value_max": -0.0006265640258789062, + "objective/train/value_min": -0.8095703125, + "objective/train/value_reward_corr": 0.47749962834320886, + "objective/train/value_std": 0.02508544921875, + "objective/train/weight_avg": 1.0007379055023193, + "objective/train/weighted_lm_loss": 3.3594696521759033, + "objective/train/weights_max": 1.0443859100341797, + "objective/train/weights_min": 0.9210035800933838, + "theoretical_loss": 3.5635719493217155, + "tokens_seen": 1291059200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006147672552166934, + "loss": 1.5821, + "theoretical_loss": 3.563375086562964, + "tokens_seen": 1291845632 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.007593745365738869, + "objective/train/docs_used": 733109, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9253463745117188, + "objective/train/original_loss": 2.925346851348877, + "objective/train/theoretical_loss": 3.563161991607373, + "objective/train/tokens_used": 1313157600, + "objective/train/value_avg": -0.0175628662109375, + "objective/train/value_loss": 0.004392481874674559, + "objective/train/value_max": -0.0007381439208984375, + "objective/train/value_min": -0.31298828125, + "objective/train/value_reward_corr": 0.6050823320229112, + "objective/train/value_std": 0.020416259765625, + "objective/train/weight_avg": 0.9992623925209045, + "objective/train/weighted_lm_loss": 2.924592971801758, + "objective/train/weights_max": 1.025255799293518, + "objective/train/weights_min": 0.9347873330116272, + "theoretical_loss": 3.563161991607373, + "tokens_seen": 1292697600 + }, + { + "epoch": 0.39, + "learning_rate": 0.000614446227929374, + "loss": 1.5782, + "theoretical_loss": 3.563112841376472, + "tokens_seen": 1292894208 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006141252006420546, + "loss": 1.5641, + "theoretical_loss": 3.562850868290324, + "tokens_seen": 1293942784 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.005946251563727856, + "objective/train/docs_used": 734261, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.315223455429077, + "objective/train/original_loss": 3.315223217010498, + "objective/train/theoretical_loss": 3.5627526984312885, + "objective/train/tokens_used": 1314796000, + "objective/train/value_avg": -0.01227569580078125, + "objective/train/value_loss": 0.0009412716026417911, + "objective/train/value_max": -0.0004839897155761719, + "objective/train/value_min": -0.5244140625, + "objective/train/value_reward_corr": 0.5393200281295741, + "objective/train/value_std": 0.0220947265625, + "objective/train/weight_avg": 1.0005992650985718, + "objective/train/weighted_lm_loss": 3.317059278488159, + "objective/train/weights_max": 1.0452470779418945, + "objective/train/weights_min": 0.913921594619751, + "theoretical_loss": 3.5627526984312885, + "tokens_seen": 1294336000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006138041733547351, + "loss": 1.5897, + "theoretical_loss": 3.5625891668020353, + "tokens_seen": 1294991360 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.002848101081326604, + "objective/train/docs_used": 734748, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5119593143463135, + "objective/train/original_loss": 3.5119590759277344, + "objective/train/theoretical_loss": 3.562344067877105, + "objective/train/tokens_used": 1316434400, + "objective/train/value_avg": -0.013519287109375, + "objective/train/value_loss": 0.0029660845175385475, + "objective/train/value_max": -0.00044417381286621094, + "objective/train/value_min": -0.49755859375, + "objective/train/value_reward_corr": 0.21929730848756304, + "objective/train/value_std": 0.0195159912109375, + "objective/train/weight_avg": 1.0002994537353516, + "objective/train/weighted_lm_loss": 3.512956380844116, + "objective/train/weights_max": 1.046648621559143, + "objective/train/weights_min": 0.9238758683204651, + "theoretical_loss": 3.562344067877105, + "tokens_seen": 1295974400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006134831460674157, + "loss": 1.5531, + "theoretical_loss": 3.5623277364104537, + "tokens_seen": 1296039936 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006131621187800963, + "loss": 1.5637, + "theoretical_loss": 3.562066576615756, + "tokens_seen": 1297088512 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 8.506722952006385e-05, + "objective/train/docs_used": 735602, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0779378414154053, + "objective/train/original_loss": 3.077937602996826, + "objective/train/theoretical_loss": 3.5619360980364068, + "objective/train/tokens_used": 1318072800, + "objective/train/value_avg": -0.015869140625, + "objective/train/value_loss": 0.002451573731377721, + "objective/train/value_max": -0.0006880760192871094, + "objective/train/value_min": -0.420166015625, + "objective/train/value_reward_corr": 0.28134678746408004, + "objective/train/value_std": 0.019012451171875, + "objective/train/weight_avg": 1.0000207424163818, + "objective/train/weighted_lm_loss": 3.0797927379608154, + "objective/train/weights_max": 1.037746548652649, + "objective/train/weights_min": 0.9439131021499634, + "theoretical_loss": 3.5619360980364068, + "tokens_seen": 1297612800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006128410914927769, + "loss": 1.5716, + "theoretical_loss": 3.5618056869194454, + "tokens_seen": 1298137088 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006125200642054574, + "loss": 1.599, + "theoretical_loss": 3.561545066824343, + "tokens_seen": 1299185664 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.003085457719862461, + "objective/train/docs_used": 736089, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1629769802093506, + "objective/train/original_loss": 3.1629767417907715, + "objective/train/theoretical_loss": 3.5615287870086756, + "objective/train/tokens_used": 1319711200, + "objective/train/value_avg": -0.01526641845703125, + "objective/train/value_loss": 0.0019344487227499485, + "objective/train/value_max": -0.0006337165832519531, + "objective/train/value_min": -0.44482421875, + "objective/train/value_reward_corr": 0.31933427033278966, + "objective/train/value_std": 0.016998291015625, + "objective/train/weight_avg": 1.000318169593811, + "objective/train/weighted_lm_loss": 3.1640372276306152, + "objective/train/weights_max": 1.034044623374939, + "objective/train/weights_min": 0.9373178482055664, + "theoretical_loss": 3.5615287870086756, + "tokens_seen": 1299251200 + }, + { + "epoch": 0.39, + "learning_rate": 0.000612199036918138, + "loss": 1.5725, + "theoretical_loss": 3.561284715834587, + "tokens_seen": 1300234240 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0019003687193617225, + "objective/train/docs_used": 737292, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3895184993743896, + "objective/train/original_loss": 3.3895182609558105, + "objective/train/theoretical_loss": 3.5611221329012466, + "objective/train/tokens_used": 1321349600, + "objective/train/value_avg": -0.0181884765625, + "objective/train/value_loss": 0.0037235829513520002, + "objective/train/value_max": -0.0005998611450195312, + "objective/train/value_min": -0.75390625, + "objective/train/value_reward_corr": 0.6803229551578224, + "objective/train/value_std": 0.038482666015625, + "objective/train/weight_avg": 1.0002083778381348, + "objective/train/weighted_lm_loss": 3.3895602226257324, + "objective/train/weights_max": 1.0378098487854004, + "objective/train/weights_min": 0.9140089750289917, + "theoretical_loss": 3.5611221329012466, + "tokens_seen": 1300889600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006118780096308187, + "loss": 1.5962, + "theoretical_loss": 3.5610246334556255, + "tokens_seen": 1301282816 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006115569823434993, + "loss": 1.5493, + "theoretical_loss": 3.5607648191942145, + "tokens_seen": 1302331392 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.010936222970485687, + "objective/train/docs_used": 737760, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.942448377609253, + "objective/train/original_loss": 2.942448139190674, + "objective/train/theoretical_loss": 3.5607161338292683, + "objective/train/tokens_used": 1322988000, + "objective/train/value_avg": -0.0251007080078125, + "objective/train/value_loss": 0.015529835596680641, + "objective/train/value_max": -0.0005092620849609375, + "objective/train/value_min": -0.97705078125, + "objective/train/value_reward_corr": 0.44906570216751585, + "objective/train/value_std": 0.05267333984375, + "objective/train/weight_avg": 0.998982310295105, + "objective/train/weighted_lm_loss": 2.9383902549743652, + "objective/train/weights_max": 1.0713562965393066, + "objective/train/weights_min": 0.9081451296806335, + "theoretical_loss": 3.5607161338292683, + "tokens_seen": 1302528000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006112359550561798, + "loss": 1.6189, + "theoretical_loss": 3.56050527255841, + "tokens_seen": 1303379968 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.0007271516369655728, + "objective/train/docs_used": 739145, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.130463123321533, + "objective/train/original_loss": 3.130463123321533, + "objective/train/theoretical_loss": 3.5603107879156584, + "objective/train/tokens_used": 1324626400, + "objective/train/value_avg": -0.0197601318359375, + "objective/train/value_loss": 0.0035742244217544794, + "objective/train/value_max": -0.0007152557373046875, + "objective/train/value_min": -0.368408203125, + "objective/train/value_reward_corr": 0.46816694990020813, + "objective/train/value_std": 0.028839111328125, + "objective/train/weight_avg": 0.9999448657035828, + "objective/train/weighted_lm_loss": 3.1309337615966797, + "objective/train/weights_max": 1.0325936079025269, + "objective/train/weights_min": 0.9311079382896423, + "theoretical_loss": 3.5603107879156584, + "tokens_seen": 1304166400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006109149277688604, + "loss": 1.5955, + "theoretical_loss": 3.560245993057567, + "tokens_seen": 1304428544 + }, + { + "epoch": 0.4, + "learning_rate": 0.000610593900481541, + "loss": 1.5721, + "theoretical_loss": 3.5599869802023325, + "tokens_seen": 1305477120 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -0.00419824430719018, + "objective/train/docs_used": 739729, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.043073892593384, + "objective/train/original_loss": 3.043074131011963, + "objective/train/theoretical_loss": 3.5599060932910644, + "objective/train/tokens_used": 1326264800, + "objective/train/value_avg": -0.019744873046875, + "objective/train/value_loss": 0.006422300357371569, + "objective/train/value_max": -0.0006589889526367188, + "objective/train/value_min": -0.990234375, + "objective/train/value_reward_corr": 0.5568798796300672, + "objective/train/value_std": 0.04541015625, + "objective/train/weight_avg": 0.9996117949485779, + "objective/train/weighted_lm_loss": 3.041696786880493, + "objective/train/weights_max": 1.0768229961395264, + "objective/train/weights_min": 0.9098062515258789, + "theoretical_loss": 3.5599060932910644, + "tokens_seen": 1305804800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006102728731942215, + "loss": 1.5709, + "theoretical_loss": 3.5597282335046425, + "tokens_seen": 1306525696 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -0.0009990237886086106, + "objective/train/docs_used": 740313, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.812108039855957, + "objective/train/original_loss": 2.812107801437378, + "objective/train/theoretical_loss": 3.5595020480938198, + "objective/train/tokens_used": 1327903200, + "objective/train/value_avg": -0.0198211669921875, + "objective/train/value_loss": 0.006268426775932312, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.93359375, + "objective/train/value_reward_corr": 0.5150763707322533, + "objective/train/value_std": 0.035125732421875, + "objective/train/weight_avg": 0.9999308586120605, + "objective/train/weighted_lm_loss": 2.8112680912017822, + "objective/train/weights_max": 1.048080325126648, + "objective/train/weights_min": 0.9088049530982971, + "theoretical_loss": 3.5595020480938198, + "tokens_seen": 1307443200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006099518459069021, + "loss": 1.5779, + "theoretical_loss": 3.5594697524777175, + "tokens_seen": 1307574272 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006096308186195827, + "loss": 1.5449, + "theoretical_loss": 3.559211536636057, + "tokens_seen": 1308622848 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0010080800857394934, + "objective/train/docs_used": 741446, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.037750005722046, + "objective/train/original_loss": 3.037749767303467, + "objective/train/theoretical_loss": 3.5590986504699043, + "objective/train/tokens_used": 1329541600, + "objective/train/value_avg": -0.0135498046875, + "objective/train/value_loss": 0.0022422911133617163, + "objective/train/value_max": -0.0005679130554199219, + "objective/train/value_min": -0.564453125, + "objective/train/value_reward_corr": 0.41021218395122977, + "objective/train/value_std": 0.0179901123046875, + "objective/train/weight_avg": 1.0001118183135986, + "objective/train/weighted_lm_loss": 3.0383660793304443, + "objective/train/weights_max": 1.0482287406921387, + "objective/train/weights_min": 0.9223403334617615, + "theoretical_loss": 3.5590986504699043, + "tokens_seen": 1309081600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006093097913322633, + "loss": 1.5937, + "theoretical_loss": 3.5589535854954364, + "tokens_seen": 1309671424 + }, + { + "debugging/Self-BLEU-5": 0.23052991959830066, + "debugging/distinct-1-grams": 0.8218563883058809, + "debugging/distinct-2-grams": 0.9709930029461279, + "debugging/entropy-1-grams": 5.44784936020325, + "debugging/entropy-2-grams": 5.931401419421702, + "debugging/length": 343.125, + "debugging/num_segments": 8, + "debugging/raw_token_scores_avg": 0.018533706665039062, + "debugging/raw_token_scores_std": 0.06356416642665863, + "epoch": 0.4, + "objective/train/advantage_avg": 0.004957432858645916, + "objective/train/docs_used": 742066, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.581773281097412, + "objective/train/original_loss": 2.581773281097412, + "objective/train/theoretical_loss": 3.5586958985729016, + "objective/train/tokens_used": 1331180000, + "objective/train/value_avg": -0.02349853515625, + "objective/train/value_loss": 0.005791535135358572, + "objective/train/value_max": -0.0002715587615966797, + "objective/train/value_min": -0.73779296875, + "objective/train/value_reward_corr": 0.1833121785074609, + "objective/train/value_std": 0.054779052734375, + "objective/train/weight_avg": 1.000524640083313, + "objective/train/weighted_lm_loss": 2.5834603309631348, + "objective/train/weights_max": 1.0698943138122559, + "objective/train/weights_min": 0.9245852828025818, + "theoretical_loss": 3.5586958985729016, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006089887640449438, + "loss": 1.5815, + "theoretical_loss": 3.5586958985729016, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006086677367576244, + "loss": 1.6035, + "theoretical_loss": 3.558438475386766, + "tokens_seen": 1311768576 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.009005351923406124, + "objective/train/docs_used": 743478, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.221822500228882, + "objective/train/original_loss": 3.221822500228882, + "objective/train/theoretical_loss": 3.558293790563962, + "objective/train/tokens_used": 1332818400, + "objective/train/value_avg": -0.0179290771484375, + "objective/train/value_loss": 0.002912376308813691, + "objective/train/value_max": -0.0003447532653808594, + "objective/train/value_min": -0.9384765625, + "objective/train/value_reward_corr": 0.4632322363811398, + "objective/train/value_std": 0.036529541015625, + "objective/train/weight_avg": 1.0009149312973022, + "objective/train/weighted_lm_loss": 3.224909782409668, + "objective/train/weights_max": 1.0490314960479736, + "objective/train/weights_min": 0.9093881845474243, + "theoretical_loss": 3.558293790563962, + "tokens_seen": 1312358400 + }, + { + "epoch": 0.4, + "learning_rate": 0.000608346709470305, + "loss": 1.5649, + "theoretical_loss": 3.5581813154566038, + "tokens_seen": 1312817152 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006080256821829855, + "loss": 1.5371, + "theoretical_loss": 3.5579244183032483, + "tokens_seen": 1313865728 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.004103593062609434, + "objective/train/docs_used": 744202, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0528643131256104, + "objective/train/original_loss": 3.0528643131256104, + "objective/train/theoretical_loss": 3.5578923246117578, + "objective/train/tokens_used": 1334456800, + "objective/train/value_avg": -0.0167236328125, + "objective/train/value_loss": 0.004340441431850195, + "objective/train/value_max": -0.00047850608825683594, + "objective/train/value_min": -0.98681640625, + "objective/train/value_reward_corr": 0.5348994202497801, + "objective/train/value_std": 0.0494384765625, + "objective/train/weight_avg": 1.0004316568374634, + "objective/train/weighted_lm_loss": 3.0540943145751953, + "objective/train/weights_max": 1.0780798196792603, + "objective/train/weights_min": 0.9069098234176636, + "theoretical_loss": 3.5578923246117578, + "tokens_seen": 1313996800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006077046548956662, + "loss": 1.5436, + "theoretical_loss": 3.557667783448787, + "tokens_seen": 1314914304 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0005990409990772605, + "objective/train/docs_used": 745555, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.274261236190796, + "objective/train/original_loss": 3.274261236190796, + "objective/train/theoretical_loss": 3.5574914988924444, + "objective/train/tokens_used": 1336095200, + "objective/train/value_avg": -0.01363372802734375, + "objective/train/value_loss": 0.004436870105564594, + "objective/train/value_max": -0.0004458427429199219, + "objective/train/value_min": -0.5029296875, + "objective/train/value_reward_corr": 0.3335478814278259, + "objective/train/value_std": 0.017974853515625, + "objective/train/weight_avg": 1.0000817775726318, + "objective/train/weighted_lm_loss": 3.2739574909210205, + "objective/train/weights_max": 1.0302108526229858, + "objective/train/weights_min": 0.9099246859550476, + "theoretical_loss": 3.5574914988924444, + "tokens_seen": 1315635200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006073836276083468, + "loss": 1.5314, + "theoretical_loss": 3.5574114104165546, + "tokens_seen": 1315962880 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006070626003210274, + "loss": 1.5401, + "theoretical_loss": 3.557155298731134, + "tokens_seen": 1317011456 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.00491510983556509, + "objective/train/docs_used": 746236, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.997715711593628, + "objective/train/original_loss": 2.997715711593628, + "objective/train/theoretical_loss": 3.5570913115896228, + "objective/train/tokens_used": 1337733600, + "objective/train/value_avg": -0.01355743408203125, + "objective/train/value_loss": 0.001909430604428053, + "objective/train/value_max": -0.00046563148498535156, + "objective/train/value_min": -0.487548828125, + "objective/train/value_reward_corr": 0.48469253180888977, + "objective/train/value_std": 0.0215301513671875, + "objective/train/weight_avg": 1.0005009174346924, + "objective/train/weighted_lm_loss": 2.9992096424102783, + "objective/train/weights_max": 1.0287266969680786, + "objective/train/weights_min": 0.9181572794914246, + "theoretical_loss": 3.5570913115896228, + "tokens_seen": 1317273600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006067415730337079, + "loss": 1.5297, + "theoretical_loss": 3.5568994479183456, + "tokens_seen": 1318060032 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.003538486547768116, + "objective/train/docs_used": 747487, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.12113618850708, + "objective/train/original_loss": 3.12113618850708, + "objective/train/theoretical_loss": 3.5566917608942976, + "objective/train/tokens_used": 1339372000, + "objective/train/value_avg": -0.01617431640625, + "objective/train/value_loss": 0.003320917719975114, + "objective/train/value_max": -0.0007948875427246094, + "objective/train/value_min": -0.72998046875, + "objective/train/value_reward_corr": 0.5694826324190647, + "objective/train/value_std": 0.023956298828125, + "objective/train/weight_avg": 1.0003701448440552, + "objective/train/weighted_lm_loss": 3.1224279403686523, + "objective/train/weights_max": 1.0186136960983276, + "objective/train/weights_min": 0.9089659452438354, + "theoretical_loss": 3.5566917608942976, + "tokens_seen": 1318912000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006064205457463885, + "loss": 1.5473, + "theoretical_loss": 3.55664385750525, + "tokens_seen": 1319108608 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006060995184590691, + "loss": 1.5729, + "theoretical_loss": 3.556388527020138, + "tokens_seen": 1320157184 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.004094723146408796, + "objective/train/docs_used": 748180, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8146634101867676, + "objective/train/original_loss": 2.8146631717681885, + "objective/train/theoretical_loss": 3.5562928450048386, + "objective/train/tokens_used": 1341010400, + "objective/train/value_avg": -0.022216796875, + "objective/train/value_loss": 0.00462748110294342, + "objective/train/value_max": -0.0007066726684570312, + "objective/train/value_min": -0.95068359375, + "objective/train/value_reward_corr": 0.6966227726296718, + "objective/train/value_std": 0.061553955078125, + "objective/train/weight_avg": 1.0004322528839111, + "objective/train/weighted_lm_loss": 2.8159947395324707, + "objective/train/weights_max": 1.0585501194000244, + "objective/train/weights_min": 0.9144104719161987, + "theoretical_loss": 3.5562928450048386, + "tokens_seen": 1320550400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006057784911717496, + "loss": 1.5188, + "theoretical_loss": 3.556133455992528, + "tokens_seen": 1321205760 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0032344800420105457, + "objective/train/docs_used": 749233, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.165480136871338, + "objective/train/original_loss": 3.1654796600341797, + "objective/train/theoretical_loss": 3.5558945621269413, + "objective/train/tokens_used": 1342648800, + "objective/train/value_avg": -0.026641845703125, + "objective/train/value_loss": 0.004488893318921328, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.4208984375, + "objective/train/value_reward_corr": 0.5272585193757473, + "objective/train/value_std": 0.043853759765625, + "objective/train/weight_avg": 1.0003455877304077, + "objective/train/weighted_lm_loss": 3.1664016246795654, + "objective/train/weights_max": 1.0414164066314697, + "objective/train/weights_min": 0.9276437759399414, + "theoretical_loss": 3.5558945621269413, + "tokens_seen": 1322188800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006054574638844302, + "loss": 1.5329, + "theoretical_loss": 3.5558786439531653, + "tokens_seen": 1322254336 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006051364365971108, + "loss": 1.5257, + "theoretical_loss": 3.555624090434014, + "tokens_seen": 1323302912 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.003216958837583661, + "objective/train/docs_used": 749806, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2126972675323486, + "objective/train/original_loss": 3.2126972675323486, + "objective/train/theoretical_loss": 3.555496910473588, + "objective/train/tokens_used": 1344287200, + "objective/train/value_avg": -0.0252227783203125, + "objective/train/value_loss": 0.005848527420312166, + "objective/train/value_max": -0.0006361007690429688, + "objective/train/value_min": -0.83251953125, + "objective/train/value_reward_corr": 0.6009934095802617, + "objective/train/value_std": 0.063720703125, + "objective/train/weight_avg": 1.0003505945205688, + "objective/train/weighted_lm_loss": 3.214207410812378, + "objective/train/weights_max": 1.0554910898208618, + "objective/train/weights_min": 0.9089609980583191, + "theoretical_loss": 3.555496910473588, + "tokens_seen": 1323827200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006048154093097914, + "loss": 1.5619, + "theoretical_loss": 3.555369794968252, + "tokens_seen": 1324351488 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006044943820224719, + "loss": 1.5431, + "theoretical_loss": 3.555115757090271, + "tokens_seen": 1325400064 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.005103923846036196, + "objective/train/docs_used": 750658, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.17714786529541, + "objective/train/original_loss": 3.17714786529541, + "objective/train/theoretical_loss": 3.5550998882650093, + "objective/train/tokens_used": 1345925600, + "objective/train/value_avg": -0.01091766357421875, + "objective/train/value_loss": 0.0007437030435539782, + "objective/train/value_max": -0.0003542900085449219, + "objective/train/value_min": -0.492919921875, + "objective/train/value_reward_corr": 0.18970679377881944, + "objective/train/value_std": 0.0131378173828125, + "objective/train/weight_avg": 1.0005141496658325, + "objective/train/weighted_lm_loss": 3.1789746284484863, + "objective/train/weights_max": 1.0504523515701294, + "objective/train/weights_min": 0.9528183341026306, + "theoretical_loss": 3.5550998882650093, + "tokens_seen": 1325465600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006041733547351524, + "loss": 1.5487, + "theoretical_loss": 3.554861976335671, + "tokens_seen": 1326448640 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.003432583762332797, + "objective/train/docs_used": 751457, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.333631753921509, + "objective/train/original_loss": 3.333631992340088, + "objective/train/theoretical_loss": 3.5547034937286472, + "objective/train/tokens_used": 1347564000, + "objective/train/value_avg": -0.043365478515625, + "objective/train/value_loss": 0.015764083713293076, + "objective/train/value_max": -0.0006589889526367188, + "objective/train/value_min": -0.982421875, + "objective/train/value_reward_corr": 0.42294396759692315, + "objective/train/value_std": 0.08062744140625, + "objective/train/weight_avg": 1.0004210472106934, + "objective/train/weighted_lm_loss": 3.3328962326049805, + "objective/train/weights_max": 1.0794461965560913, + "objective/train/weights_min": 0.9076870083808899, + "theoretical_loss": 3.5547034937286472, + "tokens_seen": 1327104000 + }, + { + "epoch": 0.4, + "learning_rate": 0.000603852327447833, + "loss": 1.5553, + "theoretical_loss": 3.5546084522412533, + "tokens_seen": 1327497216 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006035313001605136, + "loss": 1.5513, + "theoretical_loss": 3.5543551843450203, + "tokens_seen": 1328545792 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.007162427995353937, + "objective/train/docs_used": 752138, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1562352180480957, + "objective/train/original_loss": 3.1562352180480957, + "objective/train/theoretical_loss": 3.5543077250991146, + "objective/train/tokens_used": 1349202400, + "objective/train/value_avg": -0.0159454345703125, + "objective/train/value_loss": 0.0022080282215029, + "objective/train/value_max": -0.00029587745666503906, + "objective/train/value_min": -0.499267578125, + "objective/train/value_reward_corr": 0.1713643836712025, + "objective/train/value_std": 0.0188140869140625, + "objective/train/weight_avg": 1.0007271766662598, + "objective/train/weighted_lm_loss": 3.158743381500244, + "objective/train/weights_max": 1.0452157258987427, + "objective/train/weights_min": 0.9187234044075012, + "theoretical_loss": 3.5543077250991146, + "tokens_seen": 1328742400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006032102728731942, + "loss": 1.5208, + "theoretical_loss": 3.5541021721861696, + "tokens_seen": 1329594368 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0007740987348370254, + "objective/train/docs_used": 753163, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.177553415298462, + "objective/train/original_loss": 3.177553653717041, + "objective/train/theoretical_loss": 3.5539125806181584, + "objective/train/tokens_used": 1350840800, + "objective/train/value_avg": -0.0244293212890625, + "objective/train/value_loss": 0.0030373865738511086, + "objective/train/value_max": -0.000415802001953125, + "objective/train/value_min": -0.53076171875, + "objective/train/value_reward_corr": 0.8403859645217198, + "objective/train/value_std": 0.06787109375, + "objective/train/weight_avg": 1.0000925064086914, + "objective/train/weighted_lm_loss": 3.1776411533355713, + "objective/train/weights_max": 1.0473192930221558, + "objective/train/weights_min": 0.9473446607589722, + "theoretical_loss": 3.5539125806181584, + "tokens_seen": 1330380800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006028892455858748, + "loss": 1.5336, + "theoretical_loss": 3.5538494153050895, + "tokens_seen": 1330642944 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006025682182985554, + "loss": 1.5425, + "theoretical_loss": 3.5535969132433554, + "tokens_seen": 1331691520 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.010210355743765831, + "objective/train/docs_used": 753870, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9311962127685547, + "objective/train/original_loss": 2.9311957359313965, + "objective/train/theoretical_loss": 3.553518058534624, + "objective/train/tokens_used": 1352479200, + "objective/train/value_avg": -0.032196044921875, + "objective/train/value_loss": 0.0029862115625292063, + "objective/train/value_max": -0.0007824897766113281, + "objective/train/value_min": -0.6923828125, + "objective/train/value_reward_corr": 0.6883359712075157, + "objective/train/value_std": 0.05511474609375, + "objective/train/weight_avg": 1.0010358095169067, + "objective/train/weighted_lm_loss": 2.934999704360962, + "objective/train/weights_max": 1.0413177013397217, + "objective/train/weights_min": 0.9112344980239868, + "theoretical_loss": 3.553518058534624, + "tokens_seen": 1332019200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006022471910112359, + "loss": 1.5381, + "theoretical_loss": 3.5533446655437277, + "tokens_seen": 1332740096 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -0.00434036273509264, + "objective/train/docs_used": 755212, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6811165809631348, + "objective/train/original_loss": 3.681117057800293, + "objective/train/theoretical_loss": 3.5531241571044148, + "objective/train/tokens_used": 1354117600, + "objective/train/value_avg": -0.019775390625, + "objective/train/value_loss": 0.008060269057750702, + "objective/train/value_max": -0.0002551078796386719, + "objective/train/value_min": -0.9775390625, + "objective/train/value_reward_corr": 0.5145436890836487, + "objective/train/value_std": 0.0482177734375, + "objective/train/weight_avg": 0.9996054768562317, + "objective/train/weighted_lm_loss": 3.67952561378479, + "objective/train/weights_max": 1.072856068611145, + "objective/train/weights_min": 0.9075667262077332, + "theoretical_loss": 3.5531241571044148, + "tokens_seen": 1333657600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006019261637239165, + "loss": 1.5568, + "theoretical_loss": 3.5530926717501448, + "tokens_seen": 1333788672 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006016051364365971, + "loss": 1.5037, + "theoretical_loss": 3.5528409314077205, + "tokens_seen": 1334837248 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -0.02070213481783867, + "objective/train/docs_used": 755960, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.340625047683716, + "objective/train/original_loss": 3.3406248092651367, + "objective/train/theoretical_loss": 3.5527308745904564, + "objective/train/tokens_used": 1355756000, + "objective/train/value_avg": -0.021484375, + "objective/train/value_loss": 0.01104133203625679, + "objective/train/value_max": -0.0004425048828125, + "objective/train/value_min": -0.8837890625, + "objective/train/value_reward_corr": 0.16729551082512814, + "objective/train/value_std": 0.0364990234375, + "objective/train/weight_avg": 0.9979844093322754, + "objective/train/weighted_lm_loss": 3.339179515838623, + "objective/train/weights_max": 1.082771897315979, + "objective/train/weights_min": 0.9154813289642334, + "theoretical_loss": 3.5527308745904564, + "tokens_seen": 1335296000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006012841091492777, + "loss": 1.5197, + "theoretical_loss": 3.5525894440627415, + "tokens_seen": 1335885824 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.007445581257343292, + "objective/train/docs_used": 757325, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.333979606628418, + "objective/train/original_loss": 3.333979606628418, + "objective/train/theoretical_loss": 3.5523382092626603, + "objective/train/tokens_used": 1357394400, + "objective/train/value_avg": -0.012359619140625, + "objective/train/value_loss": 0.0008789384737610817, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.84033203125, + "objective/train/value_reward_corr": 0.46299854592325224, + "objective/train/value_std": 0.0184478759765625, + "objective/train/weight_avg": 1.000748872756958, + "objective/train/weighted_lm_loss": 3.336425304412842, + "objective/train/weights_max": 1.027280330657959, + "objective/train/weights_min": 0.910315215587616, + "theoretical_loss": 3.5523382092626603, + "tokens_seen": 1336934400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006009630818619582, + "loss": 1.4982, + "theoretical_loss": 3.5523382092626603, + "tokens_seen": 1336934400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006006420545746388, + "loss": 1.5532, + "theoretical_loss": 3.552087226556094, + "tokens_seen": 1337982976 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.0020090739708393812, + "objective/train/docs_used": 758060, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4109883308410645, + "objective/train/original_loss": 3.4109885692596436, + "objective/train/theoretical_loss": 3.551946159397887, + "objective/train/tokens_used": 1359032800, + "objective/train/value_avg": -0.0192108154296875, + "objective/train/value_loss": 0.009151045233011246, + "objective/train/value_max": -0.000690460205078125, + "objective/train/value_min": -0.97021484375, + "objective/train/value_reward_corr": 0.4692574057743842, + "objective/train/value_std": 0.044830322265625, + "objective/train/weight_avg": 0.999843955039978, + "objective/train/weighted_lm_loss": 3.4099133014678955, + "objective/train/weights_max": 1.0678550004959106, + "objective/train/weights_min": 0.9069958925247192, + "theoretical_loss": 3.551946159397887, + "tokens_seen": 1338572800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006003210272873194, + "loss": 1.5248, + "theoretical_loss": 3.5518364954928185, + "tokens_seen": 1339031552 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006, + "loss": 1.5456, + "theoretical_loss": 3.551586015623767, + "tokens_seen": 1340080128 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0059503731317818165, + "objective/train/docs_used": 759300, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.204714775085449, + "objective/train/original_loss": 3.204714775085449, + "objective/train/theoretical_loss": 3.5515547232799087, + "objective/train/tokens_used": 1360671200, + "objective/train/value_avg": -0.040863037109375, + "objective/train/value_loss": 0.01111854799091816, + "objective/train/value_max": -0.0006413459777832031, + "objective/train/value_min": -0.978515625, + "objective/train/value_reward_corr": 0.628428440192662, + "objective/train/value_std": 0.07794189453125, + "objective/train/weight_avg": 1.0006496906280518, + "objective/train/weighted_lm_loss": 3.2062337398529053, + "objective/train/weights_max": 1.1027348041534424, + "objective/train/weights_min": 0.9073149561882019, + "theoretical_loss": 3.5515547232799087, + "tokens_seen": 1340211200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005996789727126806, + "loss": 1.5506, + "theoretical_loss": 3.5513357865010233, + "tokens_seen": 1341128704 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0073114181868731976, + "objective/train/docs_used": 759950, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.003657341003418, + "objective/train/original_loss": 3.003657341003418, + "objective/train/theoretical_loss": 3.5511638991993744, + "objective/train/tokens_used": 1362309600, + "objective/train/value_avg": -0.0182342529296875, + "objective/train/value_loss": 0.0025955350138247013, + "objective/train/value_max": -0.0006265640258789062, + "objective/train/value_min": -0.86328125, + "objective/train/value_reward_corr": 0.37348556806734196, + "objective/train/value_std": 0.029083251953125, + "objective/train/weight_avg": 1.0007438659667969, + "objective/train/weighted_lm_loss": 3.0059666633605957, + "objective/train/weights_max": 1.0443652868270874, + "objective/train/weights_min": 0.9102250337600708, + "theoretical_loss": 3.5511638991993744, + "tokens_seen": 1341849600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005993579454253612, + "loss": 1.5214, + "theoretical_loss": 3.5510858076778202, + "tokens_seen": 1342177280 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005990369181380418, + "loss": 1.5206, + "theoretical_loss": 3.5508360787085342, + "tokens_seen": 1343225856 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.003107719589024782, + "objective/train/docs_used": 761102, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.126703977584839, + "objective/train/original_loss": 3.1267037391662598, + "objective/train/theoretical_loss": 3.550773685453774, + "objective/train/tokens_used": 1363948000, + "objective/train/value_avg": -0.0218658447265625, + "objective/train/value_loss": 0.005721935071051121, + "objective/train/value_max": -0.00034999847412109375, + "objective/train/value_min": -0.9658203125, + "objective/train/value_reward_corr": 0.5243597278903922, + "objective/train/value_std": 0.045806884765625, + "objective/train/weight_avg": 0.9997172951698303, + "objective/train/weighted_lm_loss": 3.127216100692749, + "objective/train/weights_max": 1.0584404468536377, + "objective/train/weights_min": 0.9080327749252319, + "theoretical_loss": 3.550773685453774, + "tokens_seen": 1343488000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005987158908507223, + "loss": 1.5447, + "theoretical_loss": 3.5505865991486827, + "tokens_seen": 1344274432 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.004720005672425032, + "objective/train/docs_used": 761644, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1292078495025635, + "objective/train/original_loss": 3.1292078495025635, + "objective/train/theoretical_loss": 3.550384080347402, + "objective/train/tokens_used": 1365586400, + "objective/train/value_avg": -0.010498046875, + "objective/train/value_loss": 0.0006460025324486196, + "objective/train/value_max": -0.0006022453308105469, + "objective/train/value_min": -0.427490234375, + "objective/train/value_reward_corr": 0.26275791390684333, + "objective/train/value_std": 0.01279449462890625, + "objective/train/weight_avg": 1.0004751682281494, + "objective/train/weighted_lm_loss": 3.131058931350708, + "objective/train/weights_max": 1.0274112224578857, + "objective/train/weights_min": 0.9498521089553833, + "theoretical_loss": 3.550384080347402, + "tokens_seen": 1345126400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005983948635634029, + "loss": 1.5232, + "theoretical_loss": 3.5503373685549184, + "tokens_seen": 1345323008 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005980738362760835, + "loss": 1.5178, + "theoretical_loss": 3.5500883864850294, + "tokens_seen": 1346371584 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.007197202183306217, + "objective/train/docs_used": 762571, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.403947114944458, + "objective/train/original_loss": 3.4039466381073, + "objective/train/theoretical_loss": 3.5499950821913204, + "objective/train/tokens_used": 1367224800, + "objective/train/value_avg": -0.019378662109375, + "objective/train/value_loss": 0.0021424058359116316, + "objective/train/value_max": -0.0005192756652832031, + "objective/train/value_min": -0.828125, + "objective/train/value_reward_corr": 0.338479116576493, + "objective/train/value_std": 0.0232696533203125, + "objective/train/weight_avg": 1.000730276107788, + "objective/train/weighted_lm_loss": 3.4062278270721436, + "objective/train/weights_max": 1.0306316614151, + "objective/train/weights_min": 0.921978235244751, + "theoretical_loss": 3.5499950821913204, + "tokens_seen": 1346764800 + }, + { + "epoch": 0.41, + "learning_rate": 0.000597752808988764, + "loss": 1.5547, + "theoretical_loss": 3.5498396524979308, + "tokens_seen": 1347420160 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.002816193737089634, + "objective/train/docs_used": 763291, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.993004560470581, + "objective/train/original_loss": 2.993004322052002, + "objective/train/theoretical_loss": 3.549606689303328, + "objective/train/tokens_used": 1368863200, + "objective/train/value_avg": -0.0123748779296875, + "objective/train/value_loss": 0.001632938627153635, + "objective/train/value_max": -0.000537872314453125, + "objective/train/value_min": -0.247314453125, + "objective/train/value_reward_corr": 0.5073309808549291, + "objective/train/value_std": 0.01364898681640625, + "objective/train/weight_avg": 0.9997265338897705, + "objective/train/weighted_lm_loss": 2.993623733520508, + "objective/train/weights_max": 1.0248154401779175, + "objective/train/weights_min": 0.9198179841041565, + "theoretical_loss": 3.549606689303328, + "tokens_seen": 1348403200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005974317817014446, + "loss": 1.5574, + "theoretical_loss": 3.5495911661536637, + "tokens_seen": 1348468736 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005971107544141252, + "loss": 1.5527, + "theoretical_loss": 3.5493429270133907, + "tokens_seen": 1349517312 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.008216803893446922, + "objective/train/docs_used": 764652, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6845717430114746, + "objective/train/original_loss": 2.6845715045928955, + "objective/train/theoretical_loss": 3.549218900007921, + "objective/train/tokens_used": 1370501600, + "objective/train/value_avg": -0.021820068359375, + "objective/train/value_loss": 0.0025984873063862324, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.794921875, + "objective/train/value_reward_corr": 0.42157284798426586, + "objective/train/value_std": 0.038604736328125, + "objective/train/weight_avg": 1.0008347034454346, + "objective/train/weighted_lm_loss": 2.6868984699249268, + "objective/train/weights_max": 1.0757464170455933, + "objective/train/weights_min": 0.9414330720901489, + "theoretical_loss": 3.549218900007921, + "tokens_seen": 1350041600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005967897271268058, + "loss": 1.4987, + "theoretical_loss": 3.549094934639392, + "tokens_seen": 1350565888 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005964686998394863, + "loss": 1.5172, + "theoretical_loss": 3.5488471885950625, + "tokens_seen": 1351614464 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.005588335916399956, + "objective/train/docs_used": 765372, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.327258586883545, + "objective/train/original_loss": 3.327258586883545, + "objective/train/theoretical_loss": 3.5488317126362596, + "objective/train/tokens_used": 1372140000, + "objective/train/value_avg": -0.01461029052734375, + "objective/train/value_loss": 0.0015783552080392838, + "objective/train/value_max": -0.0006022453308105469, + "objective/train/value_min": -0.64208984375, + "objective/train/value_reward_corr": 0.3313165611087738, + "objective/train/value_std": 0.0228271484375, + "objective/train/weight_avg": 1.0005667209625244, + "objective/train/weighted_lm_loss": 3.3291263580322266, + "objective/train/weights_max": 1.03770911693573, + "objective/train/weights_min": 0.9518227577209473, + "theoretical_loss": 3.5488317126362596, + "tokens_seen": 1351680000 + }, + { + "epoch": 0.41, + "learning_rate": 0.000596147672552167, + "loss": 1.5353, + "theoretical_loss": 3.5485996884449076, + "tokens_seen": 1352663040 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.006767845246940851, + "objective/train/docs_used": 766365, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.593052625656128, + "objective/train/original_loss": 2.593052625656128, + "objective/train/theoretical_loss": 3.5484451255261353, + "objective/train/tokens_used": 1373778400, + "objective/train/value_avg": -0.01131439208984375, + "objective/train/value_loss": 0.0076867686584591866, + "objective/train/value_max": -0.0005154609680175781, + "objective/train/value_min": -0.37548828125, + "objective/train/value_reward_corr": 0.2065338869491287, + "objective/train/value_std": 0.0150909423828125, + "objective/train/weight_avg": 0.9993607997894287, + "objective/train/weighted_lm_loss": 2.592104196548462, + "objective/train/weights_max": 1.0380568504333496, + "objective/train/weights_min": 0.9175110459327698, + "theoretical_loss": 3.5484451255261353, + "tokens_seen": 1353318400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005958266452648476, + "loss": 1.4823, + "theoretical_loss": 3.5483524337545385, + "tokens_seen": 1353711616 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005955056179775281, + "loss": 1.5141, + "theoretical_loss": 3.5481054240906698, + "tokens_seen": 1354760192 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.003436657600104809, + "objective/train/docs_used": 767078, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.83178448677063, + "objective/train/original_loss": 2.83178448677063, + "objective/train/theoretical_loss": 3.5480591370219345, + "objective/train/tokens_used": 1375416800, + "objective/train/value_avg": -0.0141448974609375, + "objective/train/value_loss": 0.0015930543886497617, + "objective/train/value_max": -0.0006589889526367188, + "objective/train/value_min": -0.66357421875, + "objective/train/value_reward_corr": 0.22718544617430825, + "objective/train/value_std": 0.0182647705078125, + "objective/train/weight_avg": 1.0003515481948853, + "objective/train/weighted_lm_loss": 2.833405017852783, + "objective/train/weights_max": 1.0673798322677612, + "objective/train/weights_min": 0.9184077382087708, + "theoretical_loss": 3.5480591370219345, + "tokens_seen": 1354956800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005951845906902087, + "loss": 1.5425, + "theoretical_loss": 3.547858659021117, + "tokens_seen": 1355808768 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.001355629414319992, + "objective/train/docs_used": 768221, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.669388771057129, + "objective/train/original_loss": 2.66938853263855, + "objective/train/theoretical_loss": 3.5476737454746035, + "objective/train/tokens_used": 1377055200, + "objective/train/value_avg": -0.046478271484375, + "objective/train/value_loss": 0.011763099581003189, + "objective/train/value_max": -0.0006437301635742188, + "objective/train/value_min": -0.9775390625, + "objective/train/value_reward_corr": 0.6487565018800695, + "objective/train/value_std": 0.1136474609375, + "objective/train/weight_avg": 1.0001938343048096, + "objective/train/weighted_lm_loss": 2.6684393882751465, + "objective/train/weights_max": 1.0886508226394653, + "objective/train/weights_min": 0.9062639474868774, + "theoretical_loss": 3.5476737454746035, + "tokens_seen": 1356595200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005948635634028893, + "loss": 1.5167, + "theoretical_loss": 3.5476121381147894, + "tokens_seen": 1356857344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005945425361155699, + "loss": 1.5126, + "theoretical_loss": 3.5473658609416896, + "tokens_seen": 1357905920 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.0009096242138184607, + "objective/train/docs_used": 768734, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7921087741851807, + "objective/train/original_loss": 2.7921087741851807, + "objective/train/theoretical_loss": 3.5472889492416173, + "objective/train/tokens_used": 1378693600, + "objective/train/value_avg": -0.0270233154296875, + "objective/train/value_loss": 0.005355180241167545, + "objective/train/value_max": -0.0004305839538574219, + "objective/train/value_min": -0.697265625, + "objective/train/value_reward_corr": 0.5620619294282492, + "objective/train/value_std": 0.036346435546875, + "objective/train/weight_avg": 0.999935507774353, + "objective/train/weighted_lm_loss": 2.790698766708374, + "objective/train/weights_max": 1.0525286197662354, + "objective/train/weights_min": 0.9375872611999512, + "theoretical_loss": 3.5472889492416173, + "tokens_seen": 1358233600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005942215088282504, + "loss": 1.5003, + "theoretical_loss": 3.5471198270729083, + "tokens_seen": 1358954496 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0028344702441245317, + "objective/train/docs_used": 769315, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1404590606689453, + "objective/train/original_loss": 3.1404592990875244, + "objective/train/theoretical_loss": 3.5469047466869448, + "objective/train/tokens_used": 1380332000, + "objective/train/value_avg": -0.017852783203125, + "objective/train/value_loss": 0.0030133293475955725, + "objective/train/value_max": -0.0005359649658203125, + "objective/train/value_min": -0.6982421875, + "objective/train/value_reward_corr": 0.4313196307320537, + "objective/train/value_std": 0.02362060546875, + "objective/train/weight_avg": 1.000298261642456, + "objective/train/weighted_lm_loss": 3.1414506435394287, + "objective/train/weights_max": 1.0502740144729614, + "objective/train/weights_min": 0.9206303358078003, + "theoretical_loss": 3.5469047466869448, + "tokens_seen": 1359872000 + }, + { + "epoch": 0.41, + "learning_rate": 0.000593900481540931, + "loss": 1.5431, + "theoretical_loss": 3.546874036080621, + "tokens_seen": 1360003072 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005935794542536116, + "loss": 1.5434, + "theoretical_loss": 3.5466284875380856, + "tokens_seen": 1361051648 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.005881565157324076, + "objective/train/docs_used": 770761, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.093782901763916, + "objective/train/original_loss": 3.093782901763916, + "objective/train/theoretical_loss": 3.546521136181015, + "objective/train/tokens_used": 1381970400, + "objective/train/value_avg": -0.01947021484375, + "objective/train/value_loss": 0.003097232198342681, + "objective/train/value_max": -0.0006880760192871094, + "objective/train/value_min": -0.70263671875, + "objective/train/value_reward_corr": 0.6116560191457596, + "objective/train/value_std": 0.0345458984375, + "objective/train/weight_avg": 1.000603437423706, + "objective/train/weighted_lm_loss": 3.094970464706421, + "objective/train/weights_max": 1.0338245630264282, + "objective/train/weights_min": 0.9182112812995911, + "theoretical_loss": 3.546521136181015, + "tokens_seen": 1361510400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005932584269662922, + "loss": 1.5285, + "theoretical_loss": 3.546383181019637, + "tokens_seen": 1362100224 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0047117131762206554, + "objective/train/docs_used": 771430, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4651472568511963, + "objective/train/original_loss": 3.465147018432617, + "objective/train/theoretical_loss": 3.5461381161006846, + "objective/train/tokens_used": 1383608800, + "objective/train/value_avg": -0.01165008544921875, + "objective/train/value_loss": 0.0016133618773892522, + "objective/train/value_max": -0.00044083595275878906, + "objective/train/value_min": -0.379150390625, + "objective/train/value_reward_corr": 0.2273451139681308, + "objective/train/value_std": 0.0157012939453125, + "objective/train/weight_avg": 1.0004791021347046, + "objective/train/weighted_lm_loss": 3.466648578643799, + "objective/train/weights_max": 1.0363702774047852, + "objective/train/weights_min": 0.9188045859336853, + "theoretical_loss": 3.5461381161006846, + "tokens_seen": 1363148800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005929373996789727, + "loss": 1.5013, + "theoretical_loss": 3.5461381161006846, + "tokens_seen": 1363148800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005926163723916533, + "loss": 1.5304, + "theoretical_loss": 3.5458932923577082, + "tokens_seen": 1364197376 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0026061912067234516, + "objective/train/docs_used": 772731, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9121222496032715, + "objective/train/original_loss": 2.9121222496032715, + "objective/train/theoretical_loss": 3.545755684829202, + "objective/train/tokens_used": 1385247200, + "objective/train/value_avg": -0.0125274658203125, + "objective/train/value_loss": 0.003927260637283325, + "objective/train/value_max": -0.00055694580078125, + "objective/train/value_min": -0.72119140625, + "objective/train/value_reward_corr": 0.350071486109338, + "objective/train/value_std": 0.01959228515625, + "objective/train/weight_avg": 1.0002797842025757, + "objective/train/weighted_lm_loss": 2.9125027656555176, + "objective/train/weights_max": 1.0321142673492432, + "objective/train/weights_min": 0.907707154750824, + "theoretical_loss": 3.545755684829202, + "tokens_seen": 1364787200 + }, + { + "epoch": 0.41, + "learning_rate": 0.000592295345104334, + "loss": 1.5114, + "theoretical_loss": 3.5456487093682547, + "tokens_seen": 1365245952 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005919743178170145, + "loss": 1.5349, + "theoretical_loss": 3.5454043667109367, + "tokens_seen": 1366294528 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.005871927365660667, + "objective/train/docs_used": 773461, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.111125946044922, + "objective/train/original_loss": 3.111125946044922, + "objective/train/theoretical_loss": 3.545373840756179, + "objective/train/tokens_used": 1386885600, + "objective/train/value_avg": -0.0164031982421875, + "objective/train/value_loss": 0.0029293422121554613, + "objective/train/value_max": -0.00036835670471191406, + "objective/train/value_min": -0.76025390625, + "objective/train/value_reward_corr": 0.28840420545024925, + "objective/train/value_std": 0.035125732421875, + "objective/train/weight_avg": 1.0006017684936523, + "objective/train/weighted_lm_loss": 3.1122324466705322, + "objective/train/weights_max": 1.0496221780776978, + "objective/train/weights_min": 0.9550414085388184, + "theoretical_loss": 3.545373840756179, + "tokens_seen": 1366425600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005916532905296951, + "loss": 1.5321, + "theoretical_loss": 3.545160263965424, + "tokens_seen": 1367343104 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.0002742393407970667, + "objective/train/docs_used": 774789, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1569418907165527, + "objective/train/original_loss": 3.1569418907165527, + "objective/train/theoretical_loss": 3.5449925822775556, + "objective/train/tokens_used": 1388524000, + "objective/train/value_avg": -0.040191650390625, + "objective/train/value_loss": 0.011423349380493164, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.99609375, + "objective/train/value_reward_corr": 0.7956347802001978, + "objective/train/value_std": 0.126953125, + "objective/train/weight_avg": 1.0000293254852295, + "objective/train/weighted_lm_loss": 3.1574337482452393, + "objective/train/weights_max": 1.1009340286254883, + "objective/train/weights_min": 0.9067265391349792, + "theoretical_loss": 3.5449925822775556, + "tokens_seen": 1368064000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005913322632423757, + "loss": 1.5206, + "theoretical_loss": 3.544916400712445, + "tokens_seen": 1368391680 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005910112359550563, + "loss": 1.486, + "theoretical_loss": 3.5446727765337815, + "tokens_seen": 1369440256 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0056616379879415035, + "objective/train/docs_used": 775516, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9528868198394775, + "objective/train/original_loss": 2.9528870582580566, + "objective/train/theoretical_loss": 3.5446119077955673, + "objective/train/tokens_used": 1390162400, + "objective/train/value_avg": -0.0230560302734375, + "objective/train/value_loss": 0.0032790794502943754, + "objective/train/value_max": -0.0006265640258789062, + "objective/train/value_min": -0.728515625, + "objective/train/value_reward_corr": 0.45251428264257054, + "objective/train/value_std": 0.03338623046875, + "objective/train/weight_avg": 1.0005824565887451, + "objective/train/weighted_lm_loss": 2.9547955989837646, + "objective/train/weights_max": 1.0545707941055298, + "objective/train/weights_min": 0.9186453223228455, + "theoretical_loss": 3.5446119077955673, + "tokens_seen": 1369702400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005906902086677368, + "loss": 1.5057, + "theoretical_loss": 3.5444293910122644, + "tokens_seen": 1370488832 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0016564648831263185, + "objective/train/docs_used": 776338, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.4988534450531006, + "objective/train/original_loss": 2.4988534450531006, + "objective/train/theoretical_loss": 3.5442318157187147, + "objective/train/tokens_used": 1391800800, + "objective/train/value_avg": -0.012847900390625, + "objective/train/value_loss": 0.00040138105396181345, + "objective/train/value_max": -0.0005636215209960938, + "objective/train/value_min": -0.2088623046875, + "objective/train/value_reward_corr": 0.5787764160332226, + "objective/train/value_std": 0.01482391357421875, + "objective/train/weight_avg": 1.0001676082611084, + "objective/train/weighted_lm_loss": 2.4996755123138428, + "objective/train/weights_max": 1.0192190408706665, + "objective/train/weights_min": 0.9906307458877563, + "theoretical_loss": 3.5442318157187147, + "tokens_seen": 1371340800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005903691813804174, + "loss": 1.5283, + "theoretical_loss": 3.544186243731771, + "tokens_seen": 1371537408 + }, + { + "epoch": 0.42, + "learning_rate": 0.000590048154093098, + "loss": 1.4835, + "theoretical_loss": 3.5439433342772224, + "tokens_seen": 1372585984 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0023517075460404158, + "objective/train/docs_used": 777606, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3951170444488525, + "objective/train/original_loss": 3.3951172828674316, + "objective/train/theoretical_loss": 3.543852304461728, + "objective/train/tokens_used": 1393439200, + "objective/train/value_avg": -0.0167999267578125, + "objective/train/value_loss": 0.0055540380999445915, + "objective/train/value_max": -0.0004916191101074219, + "objective/train/value_min": -0.84814453125, + "objective/train/value_reward_corr": 0.31761191020258867, + "objective/train/value_std": 0.030975341796875, + "objective/train/weight_avg": 0.9997923374176025, + "objective/train/weighted_lm_loss": 3.396540641784668, + "objective/train/weights_max": 1.0811309814453125, + "objective/train/weights_min": 0.917391300201416, + "theoretical_loss": 3.543852304461728, + "tokens_seen": 1372979200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005897271268057785, + "loss": 1.5179, + "theoretical_loss": 3.5437006622345777, + "tokens_seen": 1373634560 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.009816000238060951, + "objective/train/docs_used": 778164, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0007073879241943, + "objective/train/original_loss": 3.0007073879241943, + "objective/train/theoretical_loss": 3.5434733724455407, + "objective/train/tokens_used": 1395077600, + "objective/train/value_avg": -0.052093505859375, + "objective/train/value_loss": 0.0025513924192637205, + "objective/train/value_max": -0.0004916191101074219, + "objective/train/value_min": -0.837890625, + "objective/train/value_reward_corr": 0.9343458781484453, + "objective/train/value_std": 0.1387939453125, + "objective/train/weight_avg": 1.0009942054748535, + "objective/train/weighted_lm_loss": 3.00285005569458, + "objective/train/weights_max": 1.0331058502197266, + "objective/train/weights_min": 0.9280086755752563, + "theoretical_loss": 3.5434733724455407, + "tokens_seen": 1374617600 + }, + { + "epoch": 0.42, + "learning_rate": 0.000589406099518459, + "loss": 1.5486, + "theoretical_loss": 3.5434582271908344, + "tokens_seen": 1374683136 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005890850722311396, + "loss": 1.5239, + "theoretical_loss": 3.5432160287340206, + "tokens_seen": 1375731712 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.007180539891123772, + "objective/train/docs_used": 779255, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.182274103164673, + "objective/train/original_loss": 3.182274580001831, + "objective/train/theoretical_loss": 3.5430950180972527, + "objective/train/tokens_used": 1396716000, + "objective/train/value_avg": -0.0233917236328125, + "objective/train/value_loss": 0.004136375617235899, + "objective/train/value_max": -0.00039505958557128906, + "objective/train/value_min": -0.8994140625, + "objective/train/value_reward_corr": 0.5682414826669625, + "objective/train/value_std": 0.03790283203125, + "objective/train/weight_avg": 1.000738501548767, + "objective/train/weighted_lm_loss": 3.183889389038086, + "objective/train/weights_max": 1.0491421222686768, + "objective/train/weights_min": 0.9083186388015747, + "theoretical_loss": 3.5430950180972527, + "tokens_seen": 1376256000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005887640449438202, + "loss": 1.5046, + "theoretical_loss": 3.542974066453195, + "tokens_seen": 1376780288 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005884430176565007, + "loss": 1.5027, + "theoretical_loss": 3.5427323399384427, + "tokens_seen": 1377828864 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0011352883884683251, + "objective/train/docs_used": 780014, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8499410152435303, + "objective/train/original_loss": 2.849940776824951, + "objective/train/theoretical_loss": 3.5427172398501012, + "objective/train/tokens_used": 1398354400, + "objective/train/value_avg": -0.019012451171875, + "objective/train/value_loss": 0.005490961484611034, + "objective/train/value_max": -0.00041413307189941406, + "objective/train/value_min": -0.98876953125, + "objective/train/value_reward_corr": 0.5795311544263695, + "objective/train/value_std": 0.0445556640625, + "objective/train/weight_avg": 0.9999134540557861, + "objective/train/weighted_lm_loss": 2.848825216293335, + "objective/train/weights_max": 1.0577728748321533, + "objective/train/weights_min": 0.907558798789978, + "theoretical_loss": 3.5427172398501012, + "tokens_seen": 1377894400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005881219903691813, + "loss": 1.5287, + "theoretical_loss": 3.5424908487808704, + "tokens_seen": 1378877440 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.006594743579626083, + "objective/train/docs_used": 781246, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.218496799468994, + "objective/train/original_loss": 3.218496561050415, + "objective/train/theoretical_loss": 3.5423400361434307, + "objective/train/tokens_used": 1399992800, + "objective/train/value_avg": -0.026458740234375, + "objective/train/value_loss": 0.0035880464129149914, + "objective/train/value_max": -0.00031256675720214844, + "objective/train/value_min": -0.90283203125, + "objective/train/value_reward_corr": 0.3593142282449997, + "objective/train/value_std": 0.05126953125, + "objective/train/weight_avg": 1.000677466392517, + "objective/train/weighted_lm_loss": 3.220099925994873, + "objective/train/weights_max": 1.0859525203704834, + "objective/train/weights_min": 0.9216371774673462, + "theoretical_loss": 3.5423400361434307, + "tokens_seen": 1379532800 + }, + { + "epoch": 0.42, + "learning_rate": 0.000587800963081862, + "loss": 1.5062, + "theoretical_loss": 3.542249592572605, + "tokens_seen": 1379926016 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005874799357945425, + "loss": 1.5329, + "theoretical_loss": 3.5420085709067894, + "tokens_seen": 1380974592 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0023481559474021196, + "objective/train/docs_used": 781914, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.137873411178589, + "objective/train/original_loss": 3.137873888015747, + "objective/train/theoretical_loss": 3.541963405422658, + "objective/train/tokens_used": 1401631200, + "objective/train/value_avg": -0.018646240234375, + "objective/train/value_loss": 0.005840775091201067, + "objective/train/value_max": -0.0007066726684570312, + "objective/train/value_min": -0.91162109375, + "objective/train/value_reward_corr": 0.35523751010086557, + "objective/train/value_std": 0.03387451171875, + "objective/train/weight_avg": 0.9997939467430115, + "objective/train/weighted_lm_loss": 3.138418197631836, + "objective/train/weights_max": 1.0561710596084595, + "objective/train/weights_min": 0.9107315540313721, + "theoretical_loss": 3.541963405422658, + "tokens_seen": 1381171200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005871589085072231, + "loss": 1.5391, + "theoretical_loss": 3.541767783377579, + "tokens_seen": 1382023168 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0005290457629598677, + "objective/train/docs_used": 783265, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1165032386779785, + "objective/train/original_loss": 3.1165032386779785, + "objective/train/theoretical_loss": 3.541587346139247, + "objective/train/tokens_used": 1403269600, + "objective/train/value_avg": -0.0304412841796875, + "objective/train/value_loss": 0.00794096477329731, + "objective/train/value_max": -0.00034737586975097656, + "objective/train/value_min": -0.99267578125, + "objective/train/value_reward_corr": 0.7413046767409197, + "objective/train/value_std": 0.08221435546875, + "objective/train/weight_avg": 0.9999862909317017, + "objective/train/weighted_lm_loss": 3.118035078048706, + "objective/train/weights_max": 1.0771852731704712, + "objective/train/weights_min": 0.9173741936683655, + "theoretical_loss": 3.541587346139247, + "tokens_seen": 1382809600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005868378812199037, + "loss": 1.5071, + "theoretical_loss": 3.5415272295801388, + "tokens_seen": 1383071744 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005865168539325843, + "loss": 1.5219, + "theoretical_loss": 3.5412869091106405, + "tokens_seen": 1384120320 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0005681311595253646, + "objective/train/docs_used": 783958, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6471974849700928, + "objective/train/original_loss": 2.6471974849700928, + "objective/train/theoretical_loss": 3.5412118567506727, + "objective/train/tokens_used": 1404908000, + "objective/train/value_avg": -0.0193328857421875, + "objective/train/value_loss": 0.003528706496581435, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.912109375, + "objective/train/value_reward_corr": 0.5180837375981003, + "objective/train/value_std": 0.041961669921875, + "objective/train/weight_avg": 1.0000743865966797, + "objective/train/weighted_lm_loss": 2.648221015930176, + "objective/train/weights_max": 1.0472544431686401, + "objective/train/weights_min": 0.9104186296463013, + "theoretical_loss": 3.5412118567506727, + "tokens_seen": 1384448000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005861958266452648, + "loss": 1.5232, + "theoretical_loss": 3.541046821566258, + "tokens_seen": 1385168896 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.001178388367407024, + "objective/train/docs_used": 785103, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.260939359664917, + "objective/train/original_loss": 3.260939359664917, + "objective/train/theoretical_loss": 3.540836935720394, + "objective/train/tokens_used": 1406546400, + "objective/train/value_avg": -0.0107574462890625, + "objective/train/value_loss": 0.0027720897924154997, + "objective/train/value_max": -0.00037860870361328125, + "objective/train/value_min": -0.5654296875, + "objective/train/value_reward_corr": 0.25703275993841773, + "objective/train/value_std": 0.0148162841796875, + "objective/train/weight_avg": 0.9998958110809326, + "objective/train/weighted_lm_loss": 3.260017156600952, + "objective/train/weights_max": 1.0522302389144897, + "objective/train/weights_min": 0.9219712615013123, + "theoretical_loss": 3.540836935720394, + "tokens_seen": 1386086400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005858747993579454, + "loss": 1.5262, + "theoretical_loss": 3.5408069665451656, + "tokens_seen": 1386217472 + }, + { + "epoch": 0.42, + "learning_rate": 0.000585553772070626, + "loss": 1.4836, + "theoretical_loss": 3.5405673436465332, + "tokens_seen": 1387266048 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.007836109958589077, + "objective/train/docs_used": 785794, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.160679340362549, + "objective/train/original_loss": 3.1606791019439697, + "objective/train/theoretical_loss": 3.540462581517824, + "objective/train/tokens_used": 1408184800, + "objective/train/value_avg": -0.0162811279296875, + "objective/train/value_loss": 0.0031901285983622074, + "objective/train/value_max": -0.0003998279571533203, + "objective/train/value_min": -0.921875, + "objective/train/value_reward_corr": 0.40018346462076604, + "objective/train/value_std": 0.0301361083984375, + "objective/train/weight_avg": 1.000799298286438, + "objective/train/weighted_lm_loss": 3.1630682945251465, + "objective/train/weights_max": 1.0491386651992798, + "objective/train/weights_min": 0.9079477190971375, + "theoretical_loss": 3.540462581517824, + "tokens_seen": 1387724800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005852327447833065, + "loss": 1.5118, + "theoretical_loss": 3.540327952470525, + "tokens_seen": 1388314624 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0003660555521491915, + "objective/train/docs_used": 786491, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7516071796417236, + "objective/train/original_loss": 2.7516071796417236, + "objective/train/theoretical_loss": 3.5400887926182953, + "objective/train/tokens_used": 1409823200, + "objective/train/value_avg": -0.015472412109375, + "objective/train/value_loss": 0.0037825165782123804, + "objective/train/value_max": -0.0005297660827636719, + "objective/train/value_min": -0.61279296875, + "objective/train/value_reward_corr": 0.5621101159656617, + "objective/train/value_std": 0.02777099609375, + "objective/train/weight_avg": 0.9999820590019226, + "objective/train/weighted_lm_loss": 2.7516305446624756, + "objective/train/weights_max": 1.027550458908081, + "objective/train/weights_min": 0.9074327349662781, + "theoretical_loss": 3.5400887926182953, + "tokens_seen": 1389363200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005849117174959871, + "loss": 1.5307, + "theoretical_loss": 3.5400887926182953, + "tokens_seen": 1389363200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005845906902086677, + "loss": 1.4992, + "theoretical_loss": 3.5398498636919835, + "tokens_seen": 1390411776 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0007710073259659111, + "objective/train/docs_used": 787676, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7877981662750244, + "objective/train/original_loss": 2.7877979278564453, + "objective/train/theoretical_loss": 3.539715567503036, + "objective/train/tokens_used": 1411461600, + "objective/train/value_avg": -0.02178955078125, + "objective/train/value_loss": 0.004289465490728617, + "objective/train/value_max": -0.00051116943359375, + "objective/train/value_min": -0.7109375, + "objective/train/value_reward_corr": 0.6688189566631859, + "objective/train/value_std": 0.039794921875, + "objective/train/weight_avg": 0.9999440908432007, + "objective/train/weighted_lm_loss": 2.7855894565582275, + "objective/train/weights_max": 1.0587096214294434, + "objective/train/weights_min": 0.9125468730926514, + "theoretical_loss": 3.539715567503036, + "tokens_seen": 1391001600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005842696629213483, + "loss": 1.4915, + "theoretical_loss": 3.5396111652947146, + "tokens_seen": 1391460352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005839486356340289, + "loss": 1.5554, + "theoretical_loss": 3.539372697030594, + "tokens_seen": 1392508928 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0008818634669296443, + "objective/train/docs_used": 788318, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.731383800506592, + "objective/train/original_loss": 2.731383800506592, + "objective/train/theoretical_loss": 3.5393429046591365, + "objective/train/tokens_used": 1413100000, + "objective/train/value_avg": -0.0078277587890625, + "objective/train/value_loss": 0.00121718377340585, + "objective/train/value_max": -0.00019109249114990234, + "objective/train/value_min": -0.1300048828125, + "objective/train/value_reward_corr": 0.08998822370516754, + "objective/train/value_std": 0.007678985595703125, + "objective/train/weight_avg": 1.0000941753387451, + "objective/train/weighted_lm_loss": 2.731705665588379, + "objective/train/weights_max": 1.0091441869735718, + "objective/train/weights_min": 0.9283415675163269, + "theoretical_loss": 3.5393429046591365, + "tokens_seen": 1392640000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005836276083467095, + "loss": 1.4892, + "theoretical_loss": 3.5391344585047024, + "tokens_seen": 1393557504 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.005147413816303015, + "objective/train/docs_used": 789787, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0088322162628174, + "objective/train/original_loss": 3.0088319778442383, + "objective/train/theoretical_loss": 3.538970802579521, + "objective/train/tokens_used": 1414738400, + "objective/train/value_avg": -0.0137176513671875, + "objective/train/value_loss": 0.002011206466704607, + "objective/train/value_max": -0.0004222393035888672, + "objective/train/value_min": -0.841796875, + "objective/train/value_reward_corr": 0.5800893572152546, + "objective/train/value_std": 0.0345458984375, + "objective/train/weight_avg": 1.000524640083313, + "objective/train/weighted_lm_loss": 3.0102412700653076, + "objective/train/weights_max": 1.0634888410568237, + "objective/train/weights_min": 0.9194169640541077, + "theoretical_loss": 3.538970802579521, + "tokens_seen": 1394278400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005833065810593901, + "loss": 1.5009, + "theoretical_loss": 3.538896449323098, + "tokens_seen": 1394606080 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005829855537720707, + "loss": 1.5085, + "theoretical_loss": 3.538658669092807, + "tokens_seen": 1395654656 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.00019216409418731928, + "objective/train/docs_used": 790377, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8214304447174072, + "objective/train/original_loss": 2.8214304447174072, + "objective/train/theoretical_loss": 3.5385992597629174, + "objective/train/tokens_used": 1416376800, + "objective/train/value_avg": -0.0292205810546875, + "objective/train/value_loss": 0.01081965770572424, + "objective/train/value_max": -0.0002269744873046875, + "objective/train/value_min": -0.994140625, + "objective/train/value_reward_corr": 0.5698403119230353, + "objective/train/value_std": 0.08294677734375, + "objective/train/weight_avg": 1.0000338554382324, + "objective/train/weighted_lm_loss": 2.8214595317840576, + "objective/train/weights_max": 1.0877324342727661, + "objective/train/weights_min": 0.9072043299674988, + "theoretical_loss": 3.5385992597629174, + "tokens_seen": 1395916800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005826645264847512, + "loss": 1.5266, + "theoretical_loss": 3.5384211174218247, + "tokens_seen": 1396703232 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.005968758836388588, + "objective/train/docs_used": 791674, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0578320026397705, + "objective/train/original_loss": 3.057831287384033, + "objective/train/theoretical_loss": 3.5382282747138305, + "objective/train/tokens_used": 1418015200, + "objective/train/value_avg": -0.01488494873046875, + "objective/train/value_loss": 0.0005882735713385046, + "objective/train/value_max": -0.0007014274597167969, + "objective/train/value_min": -0.46630859375, + "objective/train/value_reward_corr": 0.4506779457731998, + "objective/train/value_std": 0.0158843994140625, + "objective/train/weight_avg": 1.0005998611450195, + "objective/train/weighted_lm_loss": 3.0598132610321045, + "objective/train/weights_max": 1.03946852684021, + "objective/train/weights_min": 0.9724408388137817, + "theoretical_loss": 3.5382282747138305, + "tokens_seen": 1397555200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005823434991974318, + "loss": 1.5125, + "theoretical_loss": 3.538183793919112, + "tokens_seen": 1397751808 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005820224719101124, + "loss": 1.5131, + "theoretical_loss": 3.53794669819459, + "tokens_seen": 1398800384 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.009094770066440105, + "objective/train/docs_used": 792315, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8204848766326904, + "objective/train/original_loss": 2.8204848766326904, + "objective/train/theoretical_loss": 3.5378578459425087, + "objective/train/tokens_used": 1419653600, + "objective/train/value_avg": -0.01337432861328125, + "objective/train/value_loss": 0.0007932379958219826, + "objective/train/value_max": -0.0009255409240722656, + "objective/train/value_min": -0.91015625, + "objective/train/value_reward_corr": 0.5838365322375103, + "objective/train/value_std": 0.0266265869140625, + "objective/train/weight_avg": 1.000913381576538, + "objective/train/weighted_lm_loss": 2.8229691982269287, + "objective/train/weights_max": 1.033569574356079, + "objective/train/weights_min": 0.9394327402114868, + "theoretical_loss": 3.5378578459425087, + "tokens_seen": 1399193600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005817014446227929, + "loss": 1.5221, + "theoretical_loss": 3.53770982985914, + "tokens_seen": 1399848960 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.006438565906137228, + "objective/train/docs_used": 793814, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.107595205307007, + "objective/train/original_loss": 3.107595443725586, + "objective/train/theoretical_loss": 3.5374879719649206, + "objective/train/tokens_used": 1421292000, + "objective/train/value_avg": -0.0183563232421875, + "objective/train/value_loss": 0.005401185713708401, + "objective/train/value_max": -0.0006771087646484375, + "objective/train/value_min": -0.95849609375, + "objective/train/value_reward_corr": 0.3279968336623006, + "objective/train/value_std": 0.037841796875, + "objective/train/weight_avg": 1.000670313835144, + "objective/train/weighted_lm_loss": 3.1091012954711914, + "objective/train/weights_max": 1.0574367046356201, + "objective/train/weights_min": 0.9103835821151733, + "theoretical_loss": 3.5374879719649206, + "tokens_seen": 1400832000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005813804173354735, + "loss": 1.5476, + "theoretical_loss": 3.537473188524598, + "tokens_seen": 1400897536 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005810593900481541, + "loss": 1.5021, + "theoretical_loss": 3.5372367738037527, + "tokens_seen": 1401946112 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0014302314957603812, + "objective/train/docs_used": 794381, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.63676118850708, + "objective/train/original_loss": 2.636761426925659, + "objective/train/theoretical_loss": 3.537118651302722, + "objective/train/tokens_used": 1422930400, + "objective/train/value_avg": -0.018890380859375, + "objective/train/value_loss": 0.004283830523490906, + "objective/train/value_max": -0.0005235671997070312, + "objective/train/value_min": -0.94873046875, + "objective/train/value_reward_corr": 0.5249154076908218, + "objective/train/value_std": 0.04217529296875, + "objective/train/weight_avg": 1.0001641511917114, + "objective/train/weighted_lm_loss": 2.636704921722412, + "objective/train/weights_max": 1.0771868228912354, + "objective/train/weights_min": 0.9107317328453064, + "theoretical_loss": 3.537118651302722, + "tokens_seen": 1402470400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005807383627608347, + "loss": 1.5143, + "theoretical_loss": 3.5370005853103414, + "tokens_seen": 1402994688 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005804173354735152, + "loss": 1.5136, + "theoretical_loss": 3.5367646226590503, + "tokens_seen": 1404043264 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.003984237089753151, + "objective/train/docs_used": 795506, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.071737766265869, + "objective/train/original_loss": 3.071737766265869, + "objective/train/theoretical_loss": 3.536749882483229, + "objective/train/tokens_used": 1424568800, + "objective/train/value_avg": -0.0177154541015625, + "objective/train/value_loss": 0.0031537411268800497, + "objective/train/value_max": -0.0006880760192871094, + "objective/train/value_min": -0.912109375, + "objective/train/value_reward_corr": 0.4972186390293735, + "objective/train/value_std": 0.034149169921875, + "objective/train/weight_avg": 1.0004140138626099, + "objective/train/weighted_lm_loss": 3.073606252670288, + "objective/train/weights_max": 1.0370980501174927, + "objective/train/weights_min": 0.9075406193733215, + "theoretical_loss": 3.536749882483229, + "tokens_seen": 1404108800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005800963081861959, + "loss": 1.5185, + "theoretical_loss": 3.536528885465506, + "tokens_seen": 1405091840 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0028414814732968807, + "objective/train/docs_used": 796159, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0772199630737305, + "objective/train/original_loss": 3.0772197246551514, + "objective/train/theoretical_loss": 3.536381664039392, + "objective/train/tokens_used": 1426207200, + "objective/train/value_avg": -0.018768310546875, + "objective/train/value_loss": 0.0068572754971683025, + "objective/train/value_max": -0.0005769729614257812, + "objective/train/value_min": -0.97216796875, + "objective/train/value_reward_corr": 0.5149332488391722, + "objective/train/value_std": 0.045501708984375, + "objective/train/weight_avg": 1.0003176927566528, + "objective/train/weighted_lm_loss": 3.077495813369751, + "objective/train/weights_max": 1.0434789657592773, + "objective/train/weights_min": 0.90689617395401, + "theoretical_loss": 3.536381664039392, + "tokens_seen": 1405747200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005797752808988765, + "loss": 1.5154, + "theoretical_loss": 3.536293373346278, + "tokens_seen": 1406140416 + }, + { + "epoch": 0.43, + "learning_rate": 0.000579454253611557, + "loss": 1.5259, + "theoretical_loss": 3.5360580859188713, + "tokens_seen": 1407188992 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.006396403536200523, + "objective/train/docs_used": 797613, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0545108318328857, + "objective/train/original_loss": 3.0545108318328857, + "objective/train/theoretical_loss": 3.536013994509762, + "objective/train/tokens_used": 1427845600, + "objective/train/value_avg": -0.01473236083984375, + "objective/train/value_loss": 0.0021116596180945635, + "objective/train/value_max": -0.0005745887756347656, + "objective/train/value_min": -0.869140625, + "objective/train/value_reward_corr": 0.514385738390287, + "objective/train/value_std": 0.0284576416015625, + "objective/train/weight_avg": 1.00065016746521, + "objective/train/weighted_lm_loss": 3.056159496307373, + "objective/train/weights_max": 1.0483163595199585, + "objective/train/weights_min": 0.9078239798545837, + "theoretical_loss": 3.536013994509762, + "tokens_seen": 1407385600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005791332263242376, + "loss": 1.52, + "theoretical_loss": 3.5358230228017264, + "tokens_seen": 1408237568 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.006268623284995556, + "objective/train/docs_used": 798401, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9931094646453857, + "objective/train/original_loss": 2.993109941482544, + "objective/train/theoretical_loss": 3.5356468724384706, + "objective/train/tokens_used": 1429484000, + "objective/train/value_avg": -0.0110626220703125, + "objective/train/value_loss": 0.0011946200393140316, + "objective/train/value_max": -0.0005483627319335938, + "objective/train/value_min": -0.76611328125, + "objective/train/value_reward_corr": 0.23164890735290725, + "objective/train/value_std": 0.01617431640625, + "objective/train/weight_avg": 1.0006327629089355, + "objective/train/weighted_lm_loss": 2.994798421859741, + "objective/train/weights_max": 1.0344021320343018, + "objective/train/weights_min": 0.9131583571434021, + "theoretical_loss": 3.5356468724384706, + "tokens_seen": 1409024000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005788121990369182, + "loss": 1.5071, + "theoretical_loss": 3.535588183614215, + "tokens_seen": 1409286144 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005784911717495988, + "loss": 1.5482, + "theoretical_loss": 3.535353567976637, + "tokens_seen": 1410334720 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0003374557418283075, + "objective/train/docs_used": 799700, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.899832248687744, + "objective/train/original_loss": 2.899832010269165, + "objective/train/theoretical_loss": 3.5352802963751935, + "objective/train/tokens_used": 1431122400, + "objective/train/value_avg": -0.0260009765625, + "objective/train/value_loss": 0.00838968064635992, + "objective/train/value_max": -0.0004107952117919922, + "objective/train/value_min": -0.98876953125, + "objective/train/value_reward_corr": 0.6579312937645861, + "objective/train/value_std": 0.071533203125, + "objective/train/weight_avg": 1.000075101852417, + "objective/train/weighted_lm_loss": 2.899803638458252, + "objective/train/weights_max": 1.0597318410873413, + "objective/train/weights_min": 0.9072009921073914, + "theoretical_loss": 3.5352802963751935, + "tokens_seen": 1410662400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005781701444622793, + "loss": 1.5341, + "theoretical_loss": 3.5351191755102187, + "tokens_seen": 1411383296 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0028251949697732925, + "objective/train/docs_used": 800754, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.603119373321533, + "objective/train/original_loss": 3.603119134902954, + "objective/train/theoretical_loss": 3.5349142648751304, + "objective/train/tokens_used": 1432760800, + "objective/train/value_avg": -0.02587890625, + "objective/train/value_loss": 0.0037965248338878155, + "objective/train/value_max": -0.0005011558532714844, + "objective/train/value_min": -0.79638671875, + "objective/train/value_reward_corr": 0.4258403025583644, + "objective/train/value_std": 0.040069580078125, + "objective/train/weight_avg": 1.0003012418746948, + "objective/train/weighted_lm_loss": 3.6041805744171143, + "objective/train/weights_max": 1.062379002571106, + "objective/train/weights_min": 0.9107644557952881, + "theoretical_loss": 3.5349142648751304, + "tokens_seen": 1412300800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005778491171749599, + "loss": 1.5191, + "theoretical_loss": 3.534885005837108, + "tokens_seen": 1412431872 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005775280898876405, + "loss": 1.5237, + "theoretical_loss": 3.5346510585803728, + "tokens_seen": 1413480448 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.00774988392367959, + "objective/train/docs_used": 801653, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0131783485412598, + "objective/train/original_loss": 3.013178825378418, + "objective/train/theoretical_loss": 3.5345487764989727, + "objective/train/tokens_used": 1434399200, + "objective/train/value_avg": -0.0219268798828125, + "objective/train/value_loss": 0.0031311702914536, + "objective/train/value_max": -0.00047278404235839844, + "objective/train/value_min": -0.8154296875, + "objective/train/value_reward_corr": 0.5012935359525964, + "objective/train/value_std": 0.04443359375, + "objective/train/weight_avg": 1.0007904767990112, + "objective/train/weighted_lm_loss": 3.014704942703247, + "objective/train/weights_max": 1.074206829071045, + "objective/train/weights_min": 0.9191460609436035, + "theoretical_loss": 3.5345487764989727, + "tokens_seen": 1413939200 + }, + { + "epoch": 0.43, + "learning_rate": 0.000577207062600321, + "loss": 1.5439, + "theoretical_loss": 3.534417333363997, + "tokens_seen": 1414529024 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": -0.005437450483441353, + "objective/train/docs_used": 802505, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9237234592437744, + "objective/train/original_loss": 2.9237234592437744, + "objective/train/theoretical_loss": 3.5341838298128803, + "objective/train/tokens_used": 1436037600, + "objective/train/value_avg": -0.028106689453125, + "objective/train/value_loss": 0.00813012383878231, + "objective/train/value_max": -0.0007410049438476562, + "objective/train/value_min": -0.64990234375, + "objective/train/value_reward_corr": 0.6013534747782827, + "objective/train/value_std": 0.03955078125, + "objective/train/weight_avg": 0.9994962215423584, + "objective/train/weighted_lm_loss": 2.9231884479522705, + "objective/train/weights_max": 1.0330514907836914, + "objective/train/weights_min": 0.9205021262168884, + "theoretical_loss": 3.5341838298128803, + "tokens_seen": 1415577600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005768860353130016, + "loss": 1.5024, + "theoretical_loss": 3.5341838298128803, + "tokens_seen": 1415577600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005765650080256822, + "loss": 1.5201, + "theoretical_loss": 3.5339505475528314, + "tokens_seen": 1416626176 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.001097711967304349, + "objective/train/docs_used": 803171, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3761231899261475, + "objective/train/original_loss": 3.3761231899261475, + "objective/train/theoretical_loss": 3.53381942338845, + "objective/train/tokens_used": 1437676000, + "objective/train/value_avg": -0.010894775390625, + "objective/train/value_loss": 0.003427887102589011, + "objective/train/value_max": -0.0004124641418457031, + "objective/train/value_min": -0.923828125, + "objective/train/value_reward_corr": 0.3498357679597873, + "objective/train/value_std": 0.024749755859375, + "objective/train/weight_avg": 1.000126600265503, + "objective/train/weighted_lm_loss": 3.3761231899261475, + "objective/train/weights_max": 1.059449315071106, + "objective/train/weights_min": 0.9134610295295715, + "theoretical_loss": 3.53381942338845, + "tokens_seen": 1417216000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005762439807383629, + "loss": 1.4953, + "theoretical_loss": 3.533717486210567, + "tokens_seen": 1417674752 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005759229534510434, + "loss": 1.5244, + "theoretical_loss": 3.5334846454137114, + "tokens_seen": 1418723328 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0038647851906716824, + "objective/train/docs_used": 804514, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.328984260559082, + "objective/train/original_loss": 2.328984260559082, + "objective/train/theoretical_loss": 3.533455555802692, + "objective/train/tokens_used": 1439314400, + "objective/train/value_avg": -0.01241302490234375, + "objective/train/value_loss": 0.0007219939725473523, + "objective/train/value_max": -0.0003514289855957031, + "objective/train/value_min": -0.2281494140625, + "objective/train/value_reward_corr": 0.3054291233971291, + "objective/train/value_std": 0.01070404052734375, + "objective/train/weight_avg": 1.0003900527954102, + "objective/train/weighted_lm_loss": 2.329988479614258, + "objective/train/weights_max": 1.0229687690734863, + "objective/train/weights_min": 0.9474850296974182, + "theoretical_loss": 3.533455555802692, + "tokens_seen": 1418854400 + }, + { + "epoch": 0.43, + "learning_rate": 0.000575601926163724, + "loss": 1.4886, + "theoretical_loss": 3.533252024790788, + "tokens_seen": 1419771904 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0016204208368435502, + "objective/train/docs_used": 804990, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.4591686725616455, + "objective/train/original_loss": 2.4591684341430664, + "objective/train/theoretical_loss": 3.5330922256380024, + "objective/train/tokens_used": 1440952800, + "objective/train/value_avg": -0.01971435546875, + "objective/train/value_loss": 0.0032882525119930506, + "objective/train/value_max": -0.00040459632873535156, + "objective/train/value_min": -0.9619140625, + "objective/train/value_reward_corr": 0.522660526449339, + "objective/train/value_std": 0.045135498046875, + "objective/train/weight_avg": 1.0001782178878784, + "objective/train/weighted_lm_loss": 2.4610257148742676, + "objective/train/weights_max": 1.0516459941864014, + "objective/train/weights_min": 0.9067788124084473, + "theoretical_loss": 3.5330922256380024, + "tokens_seen": 1420492800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005752808988764046, + "loss": 1.5127, + "theoretical_loss": 3.5330196239712217, + "tokens_seen": 1420820480 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005749598715890852, + "loss": 1.5418, + "theoretical_loss": 3.532787442585333, + "tokens_seen": 1421869056 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.002289314055815339, + "objective/train/docs_used": 806295, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4869470596313477, + "objective/train/original_loss": 3.4869470596313477, + "objective/train/theoretical_loss": 3.5327294314821365, + "objective/train/tokens_used": 1442591200, + "objective/train/value_avg": -0.01297760009765625, + "objective/train/value_loss": 0.001350079895928502, + "objective/train/value_max": -0.00029587745666503906, + "objective/train/value_min": -0.4072265625, + "objective/train/value_reward_corr": 0.6475983229562038, + "objective/train/value_std": 0.0226593017578125, + "objective/train/weight_avg": 1.000235676765442, + "objective/train/weighted_lm_loss": 3.4874160289764404, + "objective/train/weights_max": 1.0174399614334106, + "objective/train/weights_min": 0.9629401564598083, + "theoretical_loss": 3.5327294314821365, + "tokens_seen": 1422131200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005746388443017657, + "loss": 1.5105, + "theoretical_loss": 3.5325554802643375, + "tokens_seen": 1422917632 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.006135844625532627, + "objective/train/docs_used": 806909, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5259037017822266, + "objective/train/original_loss": 2.5259032249450684, + "objective/train/theoretical_loss": 3.532367171928181, + "objective/train/tokens_used": 1444229600, + "objective/train/value_avg": -0.01318359375, + "objective/train/value_loss": 0.0013163038529455662, + "objective/train/value_max": -0.0004673004150390625, + "objective/train/value_min": -0.92529296875, + "objective/train/value_reward_corr": 0.7181980865470139, + "objective/train/value_std": 0.0298004150390625, + "objective/train/weight_avg": 1.0006201267242432, + "objective/train/weighted_lm_loss": 2.527571678161621, + "objective/train/weights_max": 1.0368735790252686, + "objective/train/weights_min": 0.915955662727356, + "theoretical_loss": 3.532367171928181, + "tokens_seen": 1423769600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005743178170144462, + "loss": 1.5329, + "theoretical_loss": 3.5323237366403397, + "tokens_seen": 1423966208 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005739967897271268, + "loss": 1.498, + "theoretical_loss": 3.532092211346335, + "tokens_seen": 1425014784 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.003925240598618984, + "objective/train/docs_used": 808391, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9246060848236084, + "objective/train/original_loss": 2.9246060848236084, + "objective/train/theoretical_loss": 3.5320054455745304, + "objective/train/tokens_used": 1445868000, + "objective/train/value_avg": -0.01259613037109375, + "objective/train/value_loss": 0.0027043165173381567, + "objective/train/value_max": -0.0005550384521484375, + "objective/train/value_min": -0.6611328125, + "objective/train/value_reward_corr": 0.33319998371210124, + "objective/train/value_std": 0.0179290771484375, + "objective/train/weight_avg": 1.0004056692123413, + "objective/train/weighted_lm_loss": 2.9255475997924805, + "objective/train/weights_max": 1.0210399627685547, + "objective/train/weights_min": 0.9078383445739746, + "theoretical_loss": 3.5320054455745304, + "tokens_seen": 1425408000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005736757624398073, + "loss": 1.5203, + "theoretical_loss": 3.5318609040162015, + "tokens_seen": 1426063360 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.002390066860243678, + "objective/train/docs_used": 809012, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1527528762817383, + "objective/train/original_loss": 3.152752637863159, + "objective/train/theoretical_loss": 3.5316442510248587, + "objective/train/tokens_used": 1447506400, + "objective/train/value_avg": -0.0165252685546875, + "objective/train/value_loss": 0.004032614175230265, + "objective/train/value_max": -0.00051116943359375, + "objective/train/value_min": -0.728515625, + "objective/train/value_reward_corr": 0.40871364125481435, + "objective/train/value_std": 0.026031494140625, + "objective/train/weight_avg": 1.0002588033676147, + "objective/train/weighted_lm_loss": 3.1532278060913086, + "objective/train/weights_max": 1.0418545007705688, + "objective/train/weights_min": 0.9090387225151062, + "theoretical_loss": 3.5316442510248587, + "tokens_seen": 1427046400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005733547351524879, + "loss": 1.5034, + "theoretical_loss": 3.5316298142847016, + "tokens_seen": 1427111936 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005730337078651685, + "loss": 1.4993, + "theoretical_loss": 3.5313989417874763, + "tokens_seen": 1428160512 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0005836654454469681, + "objective/train/docs_used": 809683, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1849353313446045, + "objective/train/original_loss": 3.1849353313446045, + "objective/train/theoretical_loss": 3.5312835868880947, + "objective/train/tokens_used": 1449144800, + "objective/train/value_avg": -0.0168304443359375, + "objective/train/value_loss": 0.005284655373543501, + "objective/train/value_max": -0.00035691261291503906, + "objective/train/value_min": -0.95947265625, + "objective/train/value_reward_corr": 0.4153455447283345, + "objective/train/value_std": 0.037811279296875, + "objective/train/weight_avg": 1.0000845193862915, + "objective/train/weighted_lm_loss": 3.184934139251709, + "objective/train/weights_max": 1.0955814123153687, + "objective/train/weights_min": 0.9121441841125488, + "theoretical_loss": 3.5312835868880947, + "tokens_seen": 1428684800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005727126805778491, + "loss": 1.494, + "theoretical_loss": 3.5311682861610447, + "tokens_seen": 1429209088 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005723916532905296, + "loss": 1.5103, + "theoretical_loss": 3.5309378470427997, + "tokens_seen": 1430257664 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.003368968842551112, + "objective/train/docs_used": 810937, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.291231870651245, + "objective/train/original_loss": 3.291231155395508, + "objective/train/theoretical_loss": 3.5309234517783956, + "objective/train/tokens_used": 1450783200, + "objective/train/value_avg": -0.01446533203125, + "objective/train/value_loss": 0.004495619330555201, + "objective/train/value_max": -0.0003726482391357422, + "objective/train/value_min": -0.93359375, + "objective/train/value_reward_corr": 0.3608542989393528, + "objective/train/value_std": 0.034515380859375, + "objective/train/weight_avg": 1.0003589391708374, + "objective/train/weighted_lm_loss": 3.2920775413513184, + "objective/train/weights_max": 1.06431245803833, + "objective/train/weights_min": 0.9067843556404114, + "theoretical_loss": 3.5309234517783956, + "tokens_seen": 1430323200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005720706260032103, + "loss": 1.5459, + "theoretical_loss": 3.530707624071006, + "tokens_seen": 1431306240 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.011910401284694672, + "objective/train/docs_used": 811574, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9910295009613037, + "objective/train/original_loss": 2.991029739379883, + "objective/train/theoretical_loss": 3.530563844315122, + "objective/train/tokens_used": 1452421600, + "objective/train/value_avg": -0.0173492431640625, + "objective/train/value_loss": 0.0017770808190107346, + "objective/train/value_max": -0.0007014274597167969, + "objective/train/value_min": -0.394775390625, + "objective/train/value_reward_corr": 0.17724370810392084, + "objective/train/value_std": 0.022705078125, + "objective/train/weight_avg": 1.001199722290039, + "objective/train/weighted_lm_loss": 2.9939794540405273, + "objective/train/weights_max": 1.0275744199752808, + "objective/train/weights_min": 0.9089904427528381, + "theoretical_loss": 3.530563844315122, + "tokens_seen": 1431961600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005717495987158909, + "loss": 1.4972, + "theoretical_loss": 3.5304776168847964, + "tokens_seen": 1432354816 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005714285714285714, + "loss": 1.4769, + "theoretical_loss": 3.530247825124171, + "tokens_seen": 1433403392 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0025674165226519108, + "objective/train/docs_used": 812859, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.267925262451172, + "objective/train/original_loss": 3.2679250240325928, + "objective/train/theoretical_loss": 3.5302047631228124, + "objective/train/tokens_used": 1454060000, + "objective/train/value_avg": -0.0184783935546875, + "objective/train/value_loss": 0.006171493791043758, + "objective/train/value_max": -0.0004711151123046875, + "objective/train/value_min": -0.94580078125, + "objective/train/value_reward_corr": 0.5256981773581272, + "objective/train/value_std": 0.0467529296875, + "objective/train/weight_avg": 1.0002869367599487, + "objective/train/weighted_lm_loss": 3.2682783603668213, + "objective/train/weights_max": 1.0701717138290405, + "objective/train/weights_min": 0.9089650511741638, + "theoretical_loss": 3.5302047631228124, + "tokens_seen": 1433600000 + }, + { + "epoch": 0.43, + "learning_rate": 0.000571107544141252, + "loss": 1.5098, + "theoretical_loss": 3.530018248429992, + "tokens_seen": 1434451968 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.006667857524007559, + "objective/train/docs_used": 813625, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0692386627197266, + "objective/train/original_loss": 3.0692389011383057, + "objective/train/theoretical_loss": 3.5298462068311554, + "objective/train/tokens_used": 1455698400, + "objective/train/value_avg": -0.0157470703125, + "objective/train/value_loss": 0.003395750420168042, + "objective/train/value_max": -0.0006694793701171875, + "objective/train/value_min": -0.845703125, + "objective/train/value_reward_corr": 0.25888982113041537, + "objective/train/value_std": 0.0301055908203125, + "objective/train/weight_avg": 1.0006834268569946, + "objective/train/weighted_lm_loss": 3.0715296268463135, + "objective/train/weights_max": 1.054535984992981, + "objective/train/weights_min": 0.912117600440979, + "theoretical_loss": 3.5298462068311554, + "tokens_seen": 1435238400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005707865168539326, + "loss": 1.494, + "theoretical_loss": 3.529788886443983, + "tokens_seen": 1435500544 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005704654895666132, + "loss": 1.4861, + "theoretical_loss": 3.529559738808726, + "tokens_seen": 1436549120 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.004600708372890949, + "objective/train/docs_used": 814744, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.21927547454834, + "objective/train/original_loss": 3.21927547454834, + "objective/train/theoretical_loss": 3.5294881740749693, + "objective/train/tokens_used": 1457336800, + "objective/train/value_avg": -0.015716552734375, + "objective/train/value_loss": 0.003311208914965391, + "objective/train/value_max": -0.0007643699645996094, + "objective/train/value_min": -0.564453125, + "objective/train/value_reward_corr": 0.2500714815198366, + "objective/train/value_std": 0.0207977294921875, + "objective/train/weight_avg": 1.000476360321045, + "objective/train/weighted_lm_loss": 3.2203257083892822, + "objective/train/weights_max": 1.0579532384872437, + "objective/train/weights_min": 0.9155732989311218, + "theoretical_loss": 3.5294881740749693, + "tokens_seen": 1436876800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005701444622792937, + "loss": 1.4935, + "theoretical_loss": 3.5293308051676573, + "tokens_seen": 1437597696 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.005572533700615168, + "objective/train/docs_used": 815309, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.968520164489746, + "objective/train/original_loss": 2.968520164489746, + "objective/train/theoretical_loss": 3.5291306634941737, + "objective/train/tokens_used": 1458975200, + "objective/train/value_avg": -0.0172576904296875, + "objective/train/value_loss": 0.0026053818874061108, + "objective/train/value_max": -0.0005617141723632812, + "objective/train/value_min": -0.9013671875, + "objective/train/value_reward_corr": 0.47422721735250034, + "objective/train/value_std": 0.0306243896484375, + "objective/train/weight_avg": 1.0005701780319214, + "objective/train/weighted_lm_loss": 2.9701297283172607, + "objective/train/weights_max": 1.043620228767395, + "objective/train/weights_min": 0.9121832251548767, + "theoretical_loss": 3.5291306634941737, + "tokens_seen": 1438515200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005698234349919743, + "loss": 1.5088, + "theoretical_loss": 3.529102085165067, + "tokens_seen": 1438646272 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005695024077046549, + "loss": 1.49, + "theoretical_loss": 3.5288735784460936, + "tokens_seen": 1439694848 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.001611404470168054, + "objective/train/docs_used": 816515, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0795540809631348, + "objective/train/original_loss": 3.079554796218872, + "objective/train/theoretical_loss": 3.528773673733764, + "objective/train/tokens_used": 1460613600, + "objective/train/value_avg": -0.021270751953125, + "objective/train/value_loss": 0.0022379590664058924, + "objective/train/value_max": -0.0004858970642089844, + "objective/train/value_min": -0.359619140625, + "objective/train/value_reward_corr": 0.6626632241235458, + "objective/train/value_std": 0.0335693359375, + "objective/train/weight_avg": 1.0001722574234009, + "objective/train/weighted_lm_loss": 3.079437017440796, + "objective/train/weights_max": 1.021704077720642, + "objective/train/weights_min": 0.9171841144561768, + "theoretical_loss": 3.528773673733764, + "tokens_seen": 1440153600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005691813804173354, + "loss": 1.4788, + "theoretical_loss": 3.5286452846567244, + "tokens_seen": 1440743424 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0011687460355460644, + "objective/train/docs_used": 817059, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.111053228378296, + "objective/train/original_loss": 3.111053228378296, + "objective/train/theoretical_loss": 3.528417203443791, + "objective/train/tokens_used": 1462252000, + "objective/train/value_avg": -0.03619384765625, + "objective/train/value_loss": 0.0050958869978785515, + "objective/train/value_max": -0.000614166259765625, + "objective/train/value_min": -0.767578125, + "objective/train/value_reward_corr": 0.7681003332133484, + "objective/train/value_std": 0.07757568359375, + "objective/train/weight_avg": 1.000142216682434, + "objective/train/weighted_lm_loss": 3.1125285625457764, + "objective/train/weights_max": 1.0428510904312134, + "objective/train/weights_min": 0.9169694185256958, + "theoretical_loss": 3.528417203443791, + "tokens_seen": 1441792000 + }, + { + "epoch": 0.44, + "learning_rate": 0.000568860353130016, + "loss": 1.5093, + "theoretical_loss": 3.528417203443791, + "tokens_seen": 1441792000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005685393258426966, + "loss": 1.5218, + "theoretical_loss": 3.528189334454967, + "tokens_seen": 1442840576 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": -0.015067930333316326, + "objective/train/docs_used": 818508, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.027256488800049, + "objective/train/original_loss": 3.027256965637207, + "objective/train/theoretical_loss": 3.528061251279331, + "objective/train/tokens_used": 1463890400, + "objective/train/value_avg": -0.021026611328125, + "objective/train/value_loss": 0.015469793230295181, + "objective/train/value_max": -0.000469207763671875, + "objective/train/value_min": -0.984375, + "objective/train/value_reward_corr": 0.392220464841516, + "objective/train/value_std": 0.041900634765625, + "objective/train/weight_avg": 0.9985688328742981, + "objective/train/weighted_lm_loss": 3.023801565170288, + "objective/train/weights_max": 1.08674156665802, + "objective/train/weights_min": 0.9072178602218628, + "theoretical_loss": 3.528061251279331, + "tokens_seen": 1443430400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005682182985553773, + "loss": 1.5314, + "theoretical_loss": 3.527961677338765, + "tokens_seen": 1443889152 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005678972712680578, + "loss": 1.5318, + "theoretical_loss": 3.5277342317445353, + "tokens_seen": 1444937728 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.004605838097631931, + "objective/train/docs_used": 819330, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.887521505355835, + "objective/train/original_loss": 2.887521505355835, + "objective/train/theoretical_loss": 3.5277058159004637, + "objective/train/tokens_used": 1465528800, + "objective/train/value_avg": -0.017913818359375, + "objective/train/value_loss": 0.002171439118683338, + "objective/train/value_max": -0.0003459453582763672, + "objective/train/value_min": -0.60986328125, + "objective/train/value_reward_corr": 0.2736197544521719, + "objective/train/value_std": 0.022125244140625, + "objective/train/weight_avg": 1.0004713535308838, + "objective/train/weighted_lm_loss": 2.8896055221557617, + "objective/train/weights_max": 1.0435222387313843, + "objective/train/weights_min": 0.9257031679153442, + "theoretical_loss": 3.5277058159004637, + "tokens_seen": 1445068800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005675762439807384, + "loss": 1.5345, + "theoretical_loss": 3.5275069973224618, + "tokens_seen": 1445986304 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.006480723153799772, + "objective/train/docs_used": 820636, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.946068048477173, + "objective/train/original_loss": 2.9460675716400146, + "objective/train/theoretical_loss": 3.5273508959722504, + "objective/train/tokens_used": 1467167200, + "objective/train/value_avg": -0.04010009765625, + "objective/train/value_loss": 0.005076427944004536, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.87060546875, + "objective/train/value_reward_corr": 0.711412525814656, + "objective/train/value_std": 0.07293701171875, + "objective/train/weight_avg": 1.0006732940673828, + "objective/train/weighted_lm_loss": 2.9476418495178223, + "objective/train/weights_max": 1.0735551118850708, + "objective/train/weights_min": 0.9154947400093079, + "theoretical_loss": 3.5273508959722504, + "tokens_seen": 1446707200 + }, + { + "epoch": 0.44, + "learning_rate": 0.000567255216693419, + "loss": 1.518, + "theoretical_loss": 3.527279973723561, + "tokens_seen": 1447034880 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005669341894060996, + "loss": 1.5419, + "theoretical_loss": 3.5270531605996767, + "tokens_seen": 1448083456 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.004927781876176596, + "objective/train/docs_used": 821297, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4209160804748535, + "objective/train/original_loss": 3.4209160804748535, + "objective/train/theoretical_loss": 3.5269964901647066, + "objective/train/tokens_used": 1468805600, + "objective/train/value_avg": -0.01462554931640625, + "objective/train/value_loss": 0.0029105753637850285, + "objective/train/value_max": -0.000415802001953125, + "objective/train/value_min": -0.57421875, + "objective/train/value_reward_corr": 0.34985129570490103, + "objective/train/value_std": 0.02294921875, + "objective/train/weight_avg": 1.000507116317749, + "objective/train/weighted_lm_loss": 3.4222123622894287, + "objective/train/weights_max": 1.033901572227478, + "objective/train/weights_min": 0.9068019986152649, + "theoretical_loss": 3.5269964901647066, + "tokens_seen": 1448345600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005666131621187801, + "loss": 1.5205, + "theoretical_loss": 3.5268265576034805, + "tokens_seen": 1449132032 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.002743417164310813, + "objective/train/docs_used": 822517, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.748568058013916, + "objective/train/original_loss": 2.748568058013916, + "objective/train/theoretical_loss": 3.526642597152777, + "objective/train/tokens_used": 1470444000, + "objective/train/value_avg": -0.0308990478515625, + "objective/train/value_loss": 0.007420269772410393, + "objective/train/value_max": -0.0007791519165039062, + "objective/train/value_min": -0.728515625, + "objective/train/value_reward_corr": 0.4251670245307123, + "objective/train/value_std": 0.054595947265625, + "objective/train/weight_avg": 1.000311017036438, + "objective/train/weighted_lm_loss": 2.749098300933838, + "objective/train/weights_max": 1.056449294090271, + "objective/train/weights_min": 0.9147674441337585, + "theoretical_loss": 3.526642597152777, + "tokens_seen": 1449984000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005662921348314607, + "loss": 1.484, + "theoretical_loss": 3.5266001643884684, + "tokens_seen": 1450180608 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005659711075441413, + "loss": 1.5, + "theoretical_loss": 3.526373980608957, + "tokens_seen": 1451229184 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.00498966034501791, + "objective/train/docs_used": 823297, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.928382635116577, + "objective/train/original_loss": 2.928382158279419, + "objective/train/theoretical_loss": 3.526289215616317, + "objective/train/tokens_used": 1472082400, + "objective/train/value_avg": -0.009796142578125, + "objective/train/value_loss": 0.0009486671769991517, + "objective/train/value_max": -0.0003845691680908203, + "objective/train/value_min": -0.65478515625, + "objective/train/value_reward_corr": 0.4043989173747344, + "objective/train/value_std": 0.0154266357421875, + "objective/train/weight_avg": 1.000503659248352, + "objective/train/weighted_lm_loss": 2.929530382156372, + "objective/train/weights_max": 1.049062967300415, + "objective/train/weights_min": 0.9110416769981384, + "theoretical_loss": 3.526289215616317, + "tokens_seen": 1451622400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005656500802568218, + "loss": 1.5578, + "theoretical_loss": 3.5261480059200814, + "tokens_seen": 1452277760 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": -0.025028757750988007, + "objective/train/docs_used": 824661, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.347136974334717, + "objective/train/original_loss": 3.347136974334717, + "objective/train/theoretical_loss": 3.525936344240063, + "objective/train/tokens_used": 1473720800, + "objective/train/value_avg": -0.044281005859375, + "objective/train/value_loss": 0.01594112440943718, + "objective/train/value_max": -0.0005335807800292969, + "objective/train/value_min": -0.802734375, + "objective/train/value_reward_corr": 0.7914687909012663, + "objective/train/value_std": 0.08392333984375, + "objective/train/weight_avg": 0.9975757598876953, + "objective/train/weighted_lm_loss": 3.343456745147705, + "objective/train/weights_max": 1.0547511577606201, + "objective/train/weights_min": 0.9218704104423523, + "theoretical_loss": 3.525936344240063, + "tokens_seen": 1453260800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005653290529695024, + "loss": 1.5166, + "theoretical_loss": 3.5259222399777945, + "tokens_seen": 1453326336 + }, + { + "epoch": 0.44, + "learning_rate": 0.000565008025682183, + "loss": 1.5227, + "theoretical_loss": 3.525696682438861, + "tokens_seen": 1454374912 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.004010834731161594, + "objective/train/docs_used": 824875, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.07706880569458, + "objective/train/original_loss": 3.077069044113159, + "objective/train/theoretical_loss": 3.525583981713613, + "objective/train/tokens_used": 1475359200, + "objective/train/value_avg": -0.0122528076171875, + "objective/train/value_loss": 0.0009178869659081101, + "objective/train/value_max": -0.00064849853515625, + "objective/train/value_min": -0.47705078125, + "objective/train/value_reward_corr": 0.41454463614538084, + "objective/train/value_std": 0.0152435302734375, + "objective/train/weight_avg": 1.0004056692123413, + "objective/train/weighted_lm_loss": 3.078341245651245, + "objective/train/weights_max": 1.0408152341842651, + "objective/train/weights_min": 0.9604073166847229, + "theoretical_loss": 3.525583981713613, + "tokens_seen": 1454899200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005646869983948636, + "loss": 1.5171, + "theoretical_loss": 3.5254713329608585, + "tokens_seen": 1455423488 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005643659711075441, + "loss": 1.5477, + "theoretical_loss": 3.5252461912021733, + "tokens_seen": 1456472064 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0013862882042303681, + "objective/train/docs_used": 826174, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1443207263946533, + "objective/train/original_loss": 3.1443207263946533, + "objective/train/theoretical_loss": 3.525232126731402, + "objective/train/tokens_used": 1476997600, + "objective/train/value_avg": -0.039398193359375, + "objective/train/value_loss": 0.009170688688755035, + "objective/train/value_max": -0.0005335807800292969, + "objective/train/value_min": -0.994140625, + "objective/train/value_reward_corr": 0.7179226309086841, + "objective/train/value_std": 0.09283447265625, + "objective/train/weight_avg": 1.0001840591430664, + "objective/train/weighted_lm_loss": 3.143828868865967, + "objective/train/weights_max": 1.0861717462539673, + "objective/train/weights_min": 0.91407710313797, + "theoretical_loss": 3.525232126731402, + "tokens_seen": 1456537600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005640449438202248, + "loss": 1.5412, + "theoretical_loss": 3.525021256821997, + "tokens_seen": 1457520640 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": -0.004523031879216433, + "objective/train/docs_used": 826810, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.26664400100708, + "objective/train/original_loss": 3.26664400100708, + "objective/train/theoretical_loss": 3.524880777992677, + "objective/train/tokens_used": 1478636000, + "objective/train/value_avg": -0.01279449462890625, + "objective/train/value_loss": 0.002606441732496023, + "objective/train/value_max": -0.0002472400665283203, + "objective/train/value_min": -0.2509765625, + "objective/train/value_reward_corr": 0.5029843444804535, + "objective/train/value_std": 0.0166168212890625, + "objective/train/weight_avg": 0.9995605945587158, + "objective/train/weighted_lm_loss": 3.265850067138672, + "objective/train/weights_max": 1.0249580144882202, + "objective/train/weights_min": 0.9307553172111511, + "theoretical_loss": 3.524880777992677, + "tokens_seen": 1458176000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005637239165329054, + "loss": 1.5091, + "theoretical_loss": 3.5247965294803265, + "tokens_seen": 1458569216 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005634028892455859, + "loss": 1.5539, + "theoretical_loss": 3.524572008837958, + "tokens_seen": 1459617792 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.005809729918837547, + "objective/train/docs_used": 827996, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8091955184936523, + "objective/train/original_loss": 2.8091952800750732, + "objective/train/theoretical_loss": 3.5245299342014764, + "objective/train/tokens_used": 1480274400, + "objective/train/value_avg": -0.01261138916015625, + "objective/train/value_loss": 0.00034657641663216054, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.173828125, + "objective/train/value_reward_corr": 0.6565022230076082, + "objective/train/value_std": 0.016571044921875, + "objective/train/weight_avg": 1.0005826950073242, + "objective/train/weighted_lm_loss": 2.8111050128936768, + "objective/train/weights_max": 1.0110852718353271, + "objective/train/weights_min": 0.9392841458320618, + "theoretical_loss": 3.5245299342014764, + "tokens_seen": 1459814400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005630818619582665, + "loss": 1.5689, + "theoretical_loss": 3.5243476945564893, + "tokens_seen": 1460666368 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.00513783423230052, + "objective/train/docs_used": 828576, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.771841049194336, + "objective/train/original_loss": 2.771840810775757, + "objective/train/theoretical_loss": 3.524179594066606, + "objective/train/tokens_used": 1481912800, + "objective/train/value_avg": -0.0099945068359375, + "objective/train/value_loss": 0.0009272077586501837, + "objective/train/value_max": -0.00042557716369628906, + "objective/train/value_min": -0.181884765625, + "objective/train/value_reward_corr": 0.18449262457085244, + "objective/train/value_std": 0.00980377197265625, + "objective/train/weight_avg": 1.0005183219909668, + "objective/train/weighted_lm_loss": 2.7728700637817383, + "objective/train/weights_max": 1.0149998664855957, + "objective/train/weights_min": 0.946591854095459, + "theoretical_loss": 3.524179594066606, + "tokens_seen": 1461452800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005627608346709471, + "loss": 1.5212, + "theoretical_loss": 3.524123586298312, + "tokens_seen": 1461714944 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005624398073836277, + "loss": 1.5411, + "theoretical_loss": 3.5238996837266137, + "tokens_seen": 1462763520 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": -0.005205780733376741, + "objective/train/docs_used": 830024, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5801033973693848, + "objective/train/original_loss": 3.5801031589508057, + "objective/train/theoretical_loss": 3.523829756301615, + "objective/train/tokens_used": 1483551200, + "objective/train/value_avg": -0.0273284912109375, + "objective/train/value_loss": 0.012484314851462841, + "objective/train/value_max": -0.000415802001953125, + "objective/train/value_min": -0.94287109375, + "objective/train/value_reward_corr": 0.6194700124451102, + "objective/train/value_std": 0.06475830078125, + "objective/train/weight_avg": 0.9995405077934265, + "objective/train/weighted_lm_loss": 3.5779366493225098, + "objective/train/weights_max": 1.0649917125701904, + "objective/train/weights_min": 0.9068158864974976, + "theoretical_loss": 3.523829756301615, + "tokens_seen": 1463091200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005621187800963082, + "loss": 1.5232, + "theoretical_loss": 3.5236759865053724, + "tokens_seen": 1463812096 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0036785255651921034, + "objective/train/docs_used": 830651, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8133726119995117, + "objective/train/original_loss": 2.813372850418091, + "objective/train/theoretical_loss": 3.5234804196247764, + "objective/train/tokens_used": 1485189600, + "objective/train/value_avg": -0.0166473388671875, + "objective/train/value_loss": 0.0020494977943599224, + "objective/train/value_max": -0.0002472400665283203, + "objective/train/value_min": -0.91259765625, + "objective/train/value_reward_corr": 0.6114141215658354, + "objective/train/value_std": 0.0430908203125, + "objective/train/weight_avg": 1.0003780126571655, + "objective/train/weighted_lm_loss": 2.8145523071289062, + "objective/train/weights_max": 1.0918792486190796, + "objective/train/weights_min": 0.9086295962333679, + "theoretical_loss": 3.5234804196247764, + "tokens_seen": 1464729600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005617977528089888, + "loss": 1.5458, + "theoretical_loss": 3.523452494299356, + "tokens_seen": 1464860672 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005614767255216694, + "loss": 1.5242, + "theoretical_loss": 3.5232292067741176, + "tokens_seen": 1465909248 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.007077884394675493, + "objective/train/docs_used": 831840, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.2873592376708984, + "objective/train/original_loss": 2.2873589992523193, + "objective/train/theoretical_loss": 3.5231315827590604, + "objective/train/tokens_used": 1486828000, + "objective/train/value_avg": -0.0137939453125, + "objective/train/value_loss": 0.0005488009192049503, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.2919921875, + "objective/train/value_reward_corr": 0.40030074671986626, + "objective/train/value_std": 0.019012451171875, + "objective/train/weight_avg": 1.0007104873657227, + "objective/train/weighted_lm_loss": 2.2890195846557617, + "objective/train/weights_max": 1.0292730331420898, + "objective/train/weights_min": 0.9299592971801758, + "theoretical_loss": 3.5231315827590604, + "tokens_seen": 1466368000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005611556982343499, + "loss": 1.5195, + "theoretical_loss": 3.523006123595997, + "tokens_seen": 1466957824 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.003214449854567647, + "objective/train/docs_used": 832610, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.532829761505127, + "objective/train/original_loss": 3.5328292846679688, + "objective/train/theoretical_loss": 3.522783244432115, + "objective/train/tokens_used": 1488466400, + "objective/train/value_avg": -0.025421142578125, + "objective/train/value_loss": 0.006413314491510391, + "objective/train/value_max": -0.00037860870361328125, + "objective/train/value_min": -0.9423828125, + "objective/train/value_reward_corr": 0.42863161085475654, + "objective/train/value_std": 0.053436279296875, + "objective/train/weight_avg": 1.000353217124939, + "objective/train/weighted_lm_loss": 3.5324623584747314, + "objective/train/weights_max": 1.0819728374481201, + "objective/train/weights_min": 0.9242215156555176, + "theoretical_loss": 3.522783244432115, + "tokens_seen": 1468006400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005608346709470305, + "loss": 1.5042, + "theoretical_loss": 3.522783244432115, + "tokens_seen": 1468006400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005605136436597111, + "loss": 1.5155, + "theoretical_loss": 3.52256056895037, + "tokens_seen": 1469054976 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0031399629078805447, + "objective/train/docs_used": 834187, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.197659969329834, + "objective/train/original_loss": 3.197660446166992, + "objective/train/theoretical_loss": 3.5224354033762415, + "objective/train/tokens_used": 1490104800, + "objective/train/value_avg": -0.022552490234375, + "objective/train/value_loss": 0.005521583370864391, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.62060546875, + "objective/train/value_reward_corr": 0.4487617859775527, + "objective/train/value_std": 0.033538818359375, + "objective/train/weight_avg": 1.0003411769866943, + "objective/train/weighted_lm_loss": 3.198235034942627, + "objective/train/weights_max": 1.0512478351593018, + "objective/train/weights_min": 0.9116918444633484, + "theoretical_loss": 3.5224354033762415, + "tokens_seen": 1469644800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005601926163723918, + "loss": 1.5161, + "theoretical_loss": 3.5223380968194404, + "tokens_seen": 1470103552 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005598715890850723, + "loss": 1.5322, + "theoretical_loss": 3.522115827708778, + "tokens_seen": 1471152128 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.00540812686085701, + "objective/train/docs_used": 834912, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7791199684143066, + "objective/train/original_loss": 2.7791202068328857, + "objective/train/theoretical_loss": 3.522088058328375, + "objective/train/tokens_used": 1491743200, + "objective/train/value_avg": -0.0252532958984375, + "objective/train/value_loss": 0.001859787618741393, + "objective/train/value_max": -0.0004305839538574219, + "objective/train/value_min": -0.7802734375, + "objective/train/value_reward_corr": 0.7322946926908469, + "objective/train/value_std": 0.04937744140625, + "objective/train/weight_avg": 1.0005501508712769, + "objective/train/weighted_lm_loss": 2.7805416584014893, + "objective/train/weights_max": 1.0295873880386353, + "objective/train/weights_min": 0.9460660219192505, + "theoretical_loss": 3.522088058328375, + "tokens_seen": 1471283200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005595505617977528, + "loss": 1.5018, + "theoretical_loss": 3.5218937612886068, + "tokens_seen": 1472200704 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.00123031425755471, + "objective/train/docs_used": 835528, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1356630325317383, + "objective/train/original_loss": 3.1356630325317383, + "objective/train/theoretical_loss": 3.5217412080300594, + "objective/train/tokens_used": 1493381600, + "objective/train/value_avg": -0.016571044921875, + "objective/train/value_loss": 0.004726823884993792, + "objective/train/value_max": -0.00031256675720214844, + "objective/train/value_min": -0.81201171875, + "objective/train/value_reward_corr": 0.6517651077715062, + "objective/train/value_std": 0.03839111328125, + "objective/train/weight_avg": 1.0001462697982788, + "objective/train/weighted_lm_loss": 3.135514497756958, + "objective/train/weights_max": 1.0388514995574951, + "objective/train/weights_min": 0.9155639410018921, + "theoretical_loss": 3.5217412080300594, + "tokens_seen": 1472921600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005592295345104334, + "loss": 1.4852, + "theoretical_loss": 3.521671897229922, + "tokens_seen": 1473249280 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005589085072231139, + "loss": 1.4874, + "theoretical_loss": 3.521450235204485, + "tokens_seen": 1474297856 + }, + { + "debugging/Self-BLEU-5": 0.3534733586091342, + "debugging/distinct-1-grams": 0.736222308537691, + "debugging/distinct-2-grams": 0.9376650413188702, + "debugging/entropy-1-grams": 5.598149618237377, + "debugging/entropy-2-grams": 6.390827090690173, + "debugging/length": 519.2222222222222, + "debugging/num_segments": 9, + "debugging/raw_token_scores_avg": 0.014374006539583206, + "debugging/raw_token_scores_std": 0.06375058740377426, + "epoch": 0.45, + "objective/train/advantage_avg": 0.0036347322165966034, + "objective/train/docs_used": 836610, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.22320556640625, + "objective/train/original_loss": 3.223205804824829, + "objective/train/theoretical_loss": 3.521394851227428, + "objective/train/tokens_used": 1495020000, + "objective/train/value_avg": -0.0180206298828125, + "objective/train/value_loss": 0.0025264944415539503, + "objective/train/value_max": -0.0004820823669433594, + "objective/train/value_min": -0.3916015625, + "objective/train/value_reward_corr": 0.6184023329096957, + "objective/train/value_std": 0.038360595703125, + "objective/train/weight_avg": 1.0003759860992432, + "objective/train/weighted_lm_loss": 3.2243335247039795, + "objective/train/weights_max": 1.0284420251846313, + "objective/train/weights_min": 0.9187361598014832, + "theoretical_loss": 3.521394851227428, + "tokens_seen": 1474560000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005585874799357945, + "loss": 1.4843, + "theoretical_loss": 3.521228774884823, + "tokens_seen": 1475346432 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.00418385025113821, + "objective/train/docs_used": 837052, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.882986307144165, + "objective/train/original_loss": 2.882986068725586, + "objective/train/theoretical_loss": 3.521048986671179, + "objective/train/tokens_used": 1496658400, + "objective/train/value_avg": -0.0272979736328125, + "objective/train/value_loss": 0.004615968558937311, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.72314453125, + "objective/train/value_reward_corr": 0.40053923998632335, + "objective/train/value_std": 0.04559326171875, + "objective/train/weight_avg": 1.0004411935806274, + "objective/train/weighted_lm_loss": 2.8850419521331787, + "objective/train/weights_max": 1.0748811960220337, + "objective/train/weights_min": 0.9081478714942932, + "theoretical_loss": 3.521048986671179, + "tokens_seen": 1476198400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005582664526484751, + "loss": 1.4591, + "theoretical_loss": 3.521007515944228, + "tokens_seen": 1476395008 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005579454253611557, + "loss": 1.5221, + "theoretical_loss": 3.52078645805675, + "tokens_seen": 1477443584 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0078251538798213, + "objective/train/docs_used": 837925, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1244192123413086, + "objective/train/original_loss": 3.1244194507598877, + "objective/train/theoretical_loss": 3.5207036131165568, + "objective/train/tokens_used": 1498296800, + "objective/train/value_avg": -0.010894775390625, + "objective/train/value_loss": 0.0011438127839937806, + "objective/train/value_max": -0.0004916191101074219, + "objective/train/value_min": -0.97119140625, + "objective/train/value_reward_corr": 0.4209836812459092, + "objective/train/value_std": 0.0179901123046875, + "objective/train/weight_avg": 1.0007880926132202, + "objective/train/weighted_lm_loss": 3.126220703125, + "objective/train/weights_max": 1.0291389226913452, + "objective/train/weights_min": 0.9061315655708313, + "theoretical_loss": 3.5207036131165568, + "tokens_seen": 1477836800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005576243980738362, + "loss": 1.5447, + "theoretical_loss": 3.5205656008972, + "tokens_seen": 1478492160 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": -0.003810022259131074, + "objective/train/docs_used": 838738, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.299252986907959, + "objective/train/original_loss": 3.299253225326538, + "objective/train/theoretical_loss": 3.520358729323328, + "objective/train/tokens_used": 1499935200, + "objective/train/value_avg": -0.0302276611328125, + "objective/train/value_loss": 0.006943099666386843, + "objective/train/value_max": -0.0004973411560058594, + "objective/train/value_min": -0.6884765625, + "objective/train/value_reward_corr": 0.6465669547208435, + "objective/train/value_std": 0.0565185546875, + "objective/train/weight_avg": 0.9996533393859863, + "objective/train/weighted_lm_loss": 3.298121690750122, + "objective/train/weights_max": 1.0581399202346802, + "objective/train/weights_min": 0.9247604608535767, + "theoretical_loss": 3.520358729323328, + "tokens_seen": 1479475200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005573033707865168, + "loss": 1.5391, + "theoretical_loss": 3.5203449441411423, + "tokens_seen": 1479540736 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005569823434991974, + "loss": 1.5024, + "theoretical_loss": 3.5201244874648983, + "tokens_seen": 1480589312 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.008279941976070404, + "objective/train/docs_used": 839976, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9071156978607178, + "objective/train/original_loss": 2.907115936279297, + "objective/train/theoretical_loss": 3.5200143340557615, + "objective/train/tokens_used": 1501573600, + "objective/train/value_avg": -0.0157928466796875, + "objective/train/value_loss": 0.00046275000204332173, + "objective/train/value_max": -0.0006666183471679688, + "objective/train/value_min": -0.363525390625, + "objective/train/value_reward_corr": 0.43998042096034584, + "objective/train/value_std": 0.017364501953125, + "objective/train/weight_avg": 1.0008302927017212, + "objective/train/weighted_lm_loss": 2.90987491607666, + "objective/train/weights_max": 1.0368479490280151, + "objective/train/weights_min": 0.9882150888442993, + "theoretical_loss": 3.5200143340557615, + "tokens_seen": 1481113600 + }, + { + "epoch": 0.45, + "learning_rate": 0.000556661316211878, + "loss": 1.546, + "theoretical_loss": 3.519904230545538, + "tokens_seen": 1481637888 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005563402889245585, + "loss": 1.5024, + "theoretical_loss": 3.5196841730608828, + "tokens_seen": 1482686464 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.00033205817453563213, + "objective/train/docs_used": 840589, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.809488296508789, + "objective/train/original_loss": 2.809488534927368, + "objective/train/theoretical_loss": 3.519670426082606, + "objective/train/tokens_used": 1503212000, + "objective/train/value_avg": -0.0133056640625, + "objective/train/value_loss": 0.004194103181362152, + "objective/train/value_max": -0.0003712177276611328, + "objective/train/value_min": -0.92724609375, + "objective/train/value_reward_corr": 0.45653856500255174, + "objective/train/value_std": 0.0283355712890625, + "objective/train/weight_avg": 1.0000537633895874, + "objective/train/weighted_lm_loss": 2.809734582901001, + "objective/train/weights_max": 1.0360690355300903, + "objective/train/weights_min": 0.9108169078826904, + "theoretical_loss": 3.519670426082606, + "tokens_seen": 1482752000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005560192616372392, + "loss": 1.5115, + "theoretical_loss": 3.5194643146895, + "tokens_seen": 1483735040 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.006741808261722326, + "objective/train/docs_used": 841910, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.249469518661499, + "objective/train/original_loss": 3.249469518661499, + "objective/train/theoretical_loss": 3.5193270041770703, + "objective/train/tokens_used": 1504850400, + "objective/train/value_avg": -0.0384521484375, + "objective/train/value_loss": 0.00976258423179388, + "objective/train/value_max": -0.0006046295166015625, + "objective/train/value_min": -0.8203125, + "objective/train/value_reward_corr": 0.4986271271989866, + "objective/train/value_std": 0.06304931640625, + "objective/train/weight_avg": 1.0007222890853882, + "objective/train/weighted_lm_loss": 3.2502992153167725, + "objective/train/weights_max": 1.032426118850708, + "objective/train/weights_min": 0.9085381627082825, + "theoretical_loss": 3.5193270041770703, + "tokens_seen": 1484390400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005556982343499198, + "loss": 1.5131, + "theoretical_loss": 3.5192446551107017, + "tokens_seen": 1484783616 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005553772070626003, + "loss": 1.4714, + "theoretical_loss": 3.519025194004543, + "tokens_seen": 1485832192 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0067757414653897285, + "objective/train/docs_used": 842517, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8281490802764893, + "objective/train/original_loss": 2.82814884185791, + "objective/train/theoretical_loss": 3.5189840671168007, + "objective/train/tokens_used": 1506488800, + "objective/train/value_avg": -0.028839111328125, + "objective/train/value_loss": 0.002693325746804476, + "objective/train/value_max": -0.0002359151840209961, + "objective/train/value_min": -0.734375, + "objective/train/value_reward_corr": 0.49457742406130156, + "objective/train/value_std": 0.041595458984375, + "objective/train/weight_avg": 1.0006909370422363, + "objective/train/weighted_lm_loss": 2.829892873764038, + "objective/train/weights_max": 1.0430673360824585, + "objective/train/weights_min": 0.9137859344482422, + "theoretical_loss": 3.5189840671168007, + "tokens_seen": 1486028800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005550561797752809, + "loss": 1.5009, + "theoretical_loss": 3.518805931051819, + "tokens_seen": 1486880768 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": -0.010054057464003563, + "objective/train/docs_used": 843832, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.048976182937622, + "objective/train/original_loss": 3.048975944519043, + "objective/train/theoretical_loss": 3.518641613683862, + "objective/train/tokens_used": 1508127200, + "objective/train/value_avg": -0.0258941650390625, + "objective/train/value_loss": 0.00711037777364254, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.779296875, + "objective/train/value_reward_corr": 0.7483254003564341, + "objective/train/value_std": 0.060546875, + "objective/train/weight_avg": 0.9990295171737671, + "objective/train/weighted_lm_loss": 3.0448970794677734, + "objective/train/weights_max": 1.0378673076629639, + "objective/train/weights_min": 0.912562906742096, + "theoretical_loss": 3.518641613683862, + "tokens_seen": 1487667200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005547351524879615, + "loss": 1.474, + "theoretical_loss": 3.5185868659340627, + "tokens_seen": 1487929344 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005544141252006421, + "loss": 1.5032, + "theoretical_loss": 3.518367998333543, + "tokens_seen": 1488977920 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.005168944597244263, + "objective/train/docs_used": 844519, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.912886381149292, + "objective/train/original_loss": 2.912886142730713, + "objective/train/theoretical_loss": 3.5182996426647137, + "objective/train/tokens_used": 1509765600, + "objective/train/value_avg": -0.014892578125, + "objective/train/value_loss": 0.00125686835963279, + "objective/train/value_max": -0.0004954338073730469, + "objective/train/value_min": -0.52587890625, + "objective/train/value_reward_corr": 0.48813610500239746, + "objective/train/value_std": 0.022613525390625, + "objective/train/weight_avg": 1.0005230903625488, + "objective/train/weighted_lm_loss": 2.9143662452697754, + "objective/train/weights_max": 1.035856008529663, + "objective/train/weights_min": 0.9119735956192017, + "theoretical_loss": 3.5182996426647137, + "tokens_seen": 1489305600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005540930979133226, + "loss": 1.5093, + "theoretical_loss": 3.518149327933262, + "tokens_seen": 1490026496 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.004600292071700096, + "objective/train/docs_used": 845874, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.630387306213379, + "objective/train/original_loss": 2.630387544631958, + "objective/train/theoretical_loss": 3.517958152850192, + "objective/train/tokens_used": 1511404000, + "objective/train/value_avg": -0.01221466064453125, + "objective/train/value_loss": 0.001457849401049316, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.300048828125, + "objective/train/value_reward_corr": 0.3134641002008175, + "objective/train/value_std": 0.01346588134765625, + "objective/train/weight_avg": 1.0004671812057495, + "objective/train/weighted_lm_loss": 2.6314890384674072, + "objective/train/weights_max": 1.0163352489471436, + "objective/train/weights_min": 0.9360160231590271, + "theoretical_loss": 3.517958152850192, + "tokens_seen": 1490944000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005537720706260032, + "loss": 1.492, + "theoretical_loss": 3.5179308544169543, + "tokens_seen": 1491075072 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005534510433386838, + "loss": 1.4954, + "theoretical_loss": 3.5177125774690827, + "tokens_seen": 1492123648 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.007158373482525349, + "objective/train/docs_used": 846561, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5400681495666504, + "objective/train/original_loss": 2.5400683879852295, + "objective/train/theoretical_loss": 3.5176171430354897, + "objective/train/tokens_used": 1513042400, + "objective/train/value_avg": -0.01361083984375, + "objective/train/value_loss": 0.00035306898644194007, + "objective/train/value_max": -0.0004673004150390625, + "objective/train/value_min": -0.169189453125, + "objective/train/value_reward_corr": 0.29766096785627666, + "objective/train/value_std": 0.0138397216796875, + "objective/train/weight_avg": 1.0007176399230957, + "objective/train/weighted_lm_loss": 2.541486978530884, + "objective/train/weights_max": 1.0169020891189575, + "objective/train/weights_min": 0.9695871472358704, + "theoretical_loss": 3.5176171430354897, + "tokens_seen": 1492582400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005531300160513643, + "loss": 1.4912, + "theoretical_loss": 3.517494496774837, + "tokens_seen": 1493172224 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.005191238131374121, + "objective/train/docs_used": 848095, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9821932315826416, + "objective/train/original_loss": 2.9821932315826416, + "objective/train/theoretical_loss": 3.517276612020132, + "objective/train/tokens_used": 1514680800, + "objective/train/value_avg": -0.023284912109375, + "objective/train/value_loss": 0.005021814722567797, + "objective/train/value_max": -0.0004546642303466797, + "objective/train/value_min": -0.72265625, + "objective/train/value_reward_corr": 0.45108527329967774, + "objective/train/value_std": 0.03814697265625, + "objective/train/weight_avg": 1.0005439519882202, + "objective/train/weighted_lm_loss": 2.9835307598114014, + "objective/train/weights_max": 1.04088294506073, + "objective/train/weights_min": 0.927773654460907, + "theoretical_loss": 3.517276612020132, + "tokens_seen": 1494220800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005528089887640449, + "loss": 1.4832, + "theoretical_loss": 3.517276612020132, + "tokens_seen": 1494220800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005524879614767255, + "loss": 1.4963, + "theoretical_loss": 3.5170589228916054, + "tokens_seen": 1495269376 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 5.995551327941939e-05, + "objective/train/docs_used": 848766, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.797062635421753, + "objective/train/original_loss": 2.7970621585845947, + "objective/train/theoretical_loss": 3.5169365586079597, + "objective/train/tokens_used": 1516319200, + "objective/train/value_avg": -0.024017333984375, + "objective/train/value_loss": 0.005689500831067562, + "objective/train/value_max": -0.0004029273986816406, + "objective/train/value_min": -0.62109375, + "objective/train/value_reward_corr": 0.48267804063382747, + "objective/train/value_std": 0.034149169921875, + "objective/train/weight_avg": 1.000033974647522, + "objective/train/weighted_lm_loss": 2.795835018157959, + "objective/train/weights_max": 1.063070297241211, + "objective/train/weights_min": 0.9244911074638367, + "theoretical_loss": 3.5169365586079597, + "tokens_seen": 1495859200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005521669341894062, + "loss": 1.4983, + "theoretical_loss": 3.516841429076615, + "tokens_seen": 1496317952 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005518459069020867, + "loss": 1.4553, + "theoretical_loss": 3.516624130263237, + "tokens_seen": 1497366528 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": -0.0006745829014107585, + "objective/train/docs_used": 849888, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.945876121520996, + "objective/train/original_loss": 2.945876121520996, + "objective/train/theoretical_loss": 3.5165969816071083, + "objective/train/tokens_used": 1517957600, + "objective/train/value_avg": -0.0190277099609375, + "objective/train/value_loss": 0.005857300013303757, + "objective/train/value_max": -0.0003459453582763672, + "objective/train/value_min": -0.9853515625, + "objective/train/value_reward_corr": 0.5770720142170066, + "objective/train/value_std": 0.040374755859375, + "objective/train/weight_avg": 0.9999611377716064, + "objective/train/weighted_lm_loss": 2.9454572200775146, + "objective/train/weights_max": 1.0538973808288574, + "objective/train/weights_min": 0.9081867337226868, + "theoretical_loss": 3.5165969816071083, + "tokens_seen": 1497497600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005515248796147673, + "loss": 1.4784, + "theoretical_loss": 3.5164070261402633, + "tokens_seen": 1498415104 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.008417567238211632, + "objective/train/docs_used": 850610, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.60779070854187, + "objective/train/original_loss": 2.60779070854187, + "objective/train/theoretical_loss": 3.516257879829986, + "objective/train/tokens_used": 1519596000, + "objective/train/value_avg": -0.03338623046875, + "objective/train/value_loss": 0.000974709982983768, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.9931640625, + "objective/train/value_reward_corr": 0.9681579565962719, + "objective/train/value_std": 0.11846923828125, + "objective/train/weight_avg": 1.0008466243743896, + "objective/train/weighted_lm_loss": 2.609534502029419, + "objective/train/weights_max": 1.0908018350601196, + "objective/train/weights_min": 0.9216371178627014, + "theoretical_loss": 3.516257879829986, + "tokens_seen": 1499136000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005512038523274479, + "loss": 1.4681, + "theoretical_loss": 3.516190116397201, + "tokens_seen": 1499463680 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005508828250401284, + "loss": 1.4907, + "theoretical_loss": 3.5159734007242682, + "tokens_seen": 1500512256 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.006196260917931795, + "objective/train/docs_used": 851359, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8351237773895264, + "objective/train/original_loss": 2.8351240158081055, + "objective/train/theoretical_loss": 3.5159192520932576, + "objective/train/tokens_used": 1521234400, + "objective/train/value_avg": -0.013885498046875, + "objective/train/value_loss": 0.0005821645027026534, + "objective/train/value_max": -0.0005211830139160156, + "objective/train/value_min": -0.5029296875, + "objective/train/value_reward_corr": 0.5199338436037885, + "objective/train/value_std": 0.0198974609375, + "objective/train/weight_avg": 1.0006225109100342, + "objective/train/weighted_lm_loss": 2.8370893001556396, + "objective/train/weights_max": 1.0472012758255005, + "objective/train/weights_min": 0.9807873368263245, + "theoretical_loss": 3.5159192520932576, + "tokens_seen": 1500774400 + }, + { + "epoch": 0.46, + "learning_rate": 0.000550561797752809, + "loss": 1.4661, + "theoretical_loss": 3.5157568788123923, + "tokens_seen": 1501560832 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": -0.0032071659807115793, + "objective/train/docs_used": 852603, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.995671033859253, + "objective/train/original_loss": 2.9956705570220947, + "objective/train/theoretical_loss": 3.5155810972178196, + "objective/train/tokens_used": 1522872800, + "objective/train/value_avg": -0.0191802978515625, + "objective/train/value_loss": 0.005999458953738213, + "objective/train/value_max": -0.0005726814270019531, + "objective/train/value_min": -0.94921875, + "objective/train/value_reward_corr": 0.6212264257218507, + "objective/train/value_std": 0.052703857421875, + "objective/train/weight_avg": 0.9997088313102722, + "objective/train/weighted_lm_loss": 2.994276762008667, + "objective/train/weights_max": 1.0818488597869873, + "objective/train/weights_min": 0.9119642376899719, + "theoretical_loss": 3.5155810972178196, + "tokens_seen": 1502412800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005502407704654896, + "loss": 1.5253, + "theoretical_loss": 3.515540550353209, + "tokens_seen": 1502609408 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005499197431781702, + "loss": 1.499, + "theoretical_loss": 3.5153244150390597, + "tokens_seen": 1503657984 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": -0.001815412542782724, + "objective/train/docs_used": 853123, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7970449924468994, + "objective/train/original_loss": 2.7970447540283203, + "objective/train/theoretical_loss": 3.515243414028785, + "objective/train/tokens_used": 1524511200, + "objective/train/value_avg": -0.01531219482421875, + "objective/train/value_loss": 0.001650263904593885, + "objective/train/value_max": -0.0006070137023925781, + "objective/train/value_min": -0.329345703125, + "objective/train/value_reward_corr": 0.6679617391892365, + "objective/train/value_std": 0.0187530517578125, + "objective/train/weight_avg": 0.9998266696929932, + "objective/train/weighted_lm_loss": 2.797264337539673, + "objective/train/weights_max": 1.0330803394317627, + "objective/train/weights_min": 0.9744116067886353, + "theoretical_loss": 3.515243414028785, + "tokens_seen": 1504051200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005495987158908507, + "loss": 1.5014, + "theoretical_loss": 3.5151084725629884, + "tokens_seen": 1504706560 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.006338681094348431, + "objective/train/docs_used": 854391, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7186386585235596, + "objective/train/original_loss": 2.7186391353607178, + "objective/train/theoretical_loss": 3.51490620135546, + "objective/train/tokens_used": 1526149600, + "objective/train/value_avg": -0.0184173583984375, + "objective/train/value_loss": 0.0018485155887901783, + "objective/train/value_max": -0.0005974769592285156, + "objective/train/value_min": -0.93798828125, + "objective/train/value_reward_corr": 0.5331097167513528, + "objective/train/value_std": 0.038116455078125, + "objective/train/weight_avg": 1.000643014907837, + "objective/train/weighted_lm_loss": 2.7206554412841797, + "objective/train/weights_max": 1.0654094219207764, + "objective/train/weights_min": 0.9435109496116638, + "theoretical_loss": 3.51490620135546, + "tokens_seen": 1505689600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005492776886035313, + "loss": 1.493, + "theoretical_loss": 3.5148927226187405, + "tokens_seen": 1505755136 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005489566613162119, + "loss": 1.497, + "theoretical_loss": 3.514677164900762, + "tokens_seen": 1506803712 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.014839021489024162, + "objective/train/docs_used": 855086, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.280258893966675, + "objective/train/original_loss": 3.280259609222412, + "objective/train/theoretical_loss": 3.5145694580313287, + "objective/train/tokens_used": 1527788000, + "objective/train/value_avg": -0.030853271484375, + "objective/train/value_loss": 0.0047378577291965485, + "objective/train/value_max": -0.0007042884826660156, + "objective/train/value_min": -0.84765625, + "objective/train/value_reward_corr": 0.5476509273529723, + "objective/train/value_std": 0.06500244140625, + "objective/train/weight_avg": 1.0015074014663696, + "objective/train/weighted_lm_loss": 3.285646438598633, + "objective/train/weights_max": 1.07375967502594, + "objective/train/weights_min": 0.9184357523918152, + "theoretical_loss": 3.5145694580313287, + "tokens_seen": 1507328000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005486356340288925, + "loss": 1.472, + "theoretical_loss": 3.514461799104195, + "tokens_seen": 1507852288 + }, + { + "epoch": 0.46, + "learning_rate": 0.000548314606741573, + "loss": 1.5049, + "theoretical_loss": 3.5142466249248754, + "tokens_seen": 1508900864 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.016272790729999542, + "objective/train/docs_used": 856541, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.668013095855713, + "objective/train/original_loss": 2.668013095855713, + "objective/train/theoretical_loss": 3.5142331828940288, + "objective/train/tokens_used": 1529426400, + "objective/train/value_avg": -0.0213775634765625, + "objective/train/value_loss": 0.0013327361084520817, + "objective/train/value_max": -0.0006537437438964844, + "objective/train/value_min": -0.46875, + "objective/train/value_reward_corr": 0.3651327068264872, + "objective/train/value_std": 0.033355712890625, + "objective/train/weight_avg": 1.0016340017318726, + "objective/train/weighted_lm_loss": 2.6716580390930176, + "objective/train/weights_max": 1.0473402738571167, + "objective/train/weights_min": 0.9607258439064026, + "theoretical_loss": 3.5142331828940288, + "tokens_seen": 1508966400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005479935794542537, + "loss": 1.5006, + "theoretical_loss": 3.5140316420593347, + "tokens_seen": 1509949440 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.005736599210649729, + "objective/train/docs_used": 857299, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.895097494125366, + "objective/train/original_loss": 2.895097255706787, + "objective/train/theoretical_loss": 3.5138973747853353, + "objective/train/tokens_used": 1531064800, + "objective/train/value_avg": -0.01448822021484375, + "objective/train/value_loss": 0.002038579899817705, + "objective/train/value_max": -0.00036406517028808594, + "objective/train/value_min": -0.348876953125, + "objective/train/value_reward_corr": 0.40778191115079454, + "objective/train/value_std": 0.022430419921875, + "objective/train/weight_avg": 1.0005837678909302, + "objective/train/weighted_lm_loss": 2.8961708545684814, + "objective/train/weights_max": 1.0281965732574463, + "objective/train/weights_min": 0.9308682084083557, + "theoretical_loss": 3.5138973747853353, + "tokens_seen": 1510604800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005476725521669343, + "loss": 1.4909, + "theoretical_loss": 3.513816850204793, + "tokens_seen": 1510998016 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005473515248796148, + "loss": 1.4542, + "theoretical_loss": 3.5136022490591605, + "tokens_seen": 1512046592 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": -0.007893734611570835, + "objective/train/docs_used": 858636, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7606306076049805, + "objective/train/original_loss": 2.7606308460235596, + "objective/train/theoretical_loss": 3.513562032551141, + "objective/train/tokens_used": 1532703200, + "objective/train/value_avg": -0.01309967041015625, + "objective/train/value_loss": 0.00467052822932601, + "objective/train/value_max": -0.0005235671997070312, + "objective/train/value_min": -0.5849609375, + "objective/train/value_reward_corr": 0.4818208240765862, + "objective/train/value_std": 0.02178955078125, + "objective/train/weight_avg": 0.9992336630821228, + "objective/train/weighted_lm_loss": 2.7609975337982178, + "objective/train/weights_max": 1.0597727298736572, + "objective/train/weights_min": 0.9172792434692383, + "theoretical_loss": 3.513562032551141, + "tokens_seen": 1512243200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005470304975922954, + "loss": 1.471, + "theoretical_loss": 3.5133878383210337, + "tokens_seen": 1513095168 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0023712110705673695, + "objective/train/docs_used": 859135, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1785707473754883, + "objective/train/original_loss": 3.178570032119751, + "objective/train/theoretical_loss": 3.513227155041438, + "objective/train/tokens_used": 1534341600, + "objective/train/value_avg": -0.022369384765625, + "objective/train/value_loss": 0.008525801822543144, + "objective/train/value_max": -0.00046563148498535156, + "objective/train/value_min": -0.9794921875, + "objective/train/value_reward_corr": 0.3264065960570431, + "objective/train/value_std": 0.05169677734375, + "objective/train/weight_avg": 1.000279188156128, + "objective/train/weighted_lm_loss": 3.178607940673828, + "objective/train/weights_max": 1.0988506078720093, + "objective/train/weights_min": 0.916259229183197, + "theoretical_loss": 3.513227155041438, + "tokens_seen": 1513881600 + }, + { + "epoch": 0.46, + "learning_rate": 0.000546709470304976, + "loss": 1.5081, + "theoretical_loss": 3.513173617689695, + "tokens_seen": 1514143744 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005463884430176566, + "loss": 1.4599, + "theoretical_loss": 3.512959586865108, + "tokens_seen": 1515192320 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0040972973220050335, + "objective/train/docs_used": 860397, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6402175426483154, + "objective/train/original_loss": 2.6402173042297363, + "objective/train/theoretical_loss": 3.5128927411102966, + "objective/train/tokens_used": 1535980000, + "objective/train/value_avg": -0.018707275390625, + "objective/train/value_loss": 0.004481840413063765, + "objective/train/value_max": -0.0007615089416503906, + "objective/train/value_min": -0.98388671875, + "objective/train/value_reward_corr": 0.540448984033994, + "objective/train/value_std": 0.04730224609375, + "objective/train/weight_avg": 1.000431776046753, + "objective/train/weighted_lm_loss": 2.6404998302459717, + "objective/train/weights_max": 1.063296914100647, + "objective/train/weights_min": 0.9077228307723999, + "theoretical_loss": 3.5128927411102966, + "tokens_seen": 1515520000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005460674157303371, + "loss": 1.5062, + "theoretical_loss": 3.512745745547918, + "tokens_seen": 1516240896 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": -0.006865189876407385, + "objective/train/docs_used": 860980, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.272125720977783, + "objective/train/original_loss": 3.2721259593963623, + "objective/train/theoretical_loss": 3.5125587896158477, + "objective/train/tokens_used": 1537618400, + "objective/train/value_avg": -0.028289794921875, + "objective/train/value_loss": 0.013507991097867489, + "objective/train/value_max": -0.0005421638488769531, + "objective/train/value_min": -0.994140625, + "objective/train/value_reward_corr": 0.6451303773654025, + "objective/train/value_std": 0.0836181640625, + "objective/train/weight_avg": 0.9993797540664673, + "objective/train/weighted_lm_loss": 3.2695655822753906, + "objective/train/weights_max": 1.0766900777816772, + "objective/train/weights_min": 0.9069420099258423, + "theoretical_loss": 3.5125587896158477, + "tokens_seen": 1517158400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005457463884430177, + "loss": 1.4875, + "theoretical_loss": 3.5125320934394484, + "tokens_seen": 1517289472 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005454253611556983, + "loss": 1.4837, + "theoretical_loss": 3.5123186302417007, + "tokens_seen": 1518338048 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0036279144696891308, + "objective/train/docs_used": 862373, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.3950796127319336, + "objective/train/original_loss": 2.3950798511505127, + "objective/train/theoretical_loss": 3.512225299420264, + "objective/train/tokens_used": 1539256800, + "objective/train/value_avg": -0.0201873779296875, + "objective/train/value_loss": 0.0045894295908510685, + "objective/train/value_max": -0.0005817413330078125, + "objective/train/value_min": -0.9931640625, + "objective/train/value_reward_corr": 0.6321896221374695, + "objective/train/value_std": 0.060699462890625, + "objective/train/weight_avg": 1.0003854036331177, + "objective/train/weighted_lm_loss": 2.39554500579834, + "objective/train/weights_max": 1.072894811630249, + "objective/train/weights_min": 0.9080727696418762, + "theoretical_loss": 3.512225299420264, + "tokens_seen": 1518796800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005451043338683788, + "loss": 1.4709, + "theoretical_loss": 3.51210535565735, + "tokens_seen": 1519386624 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": -0.0014164363965392113, + "objective/train/docs_used": 862945, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.149906873703003, + "objective/train/original_loss": 3.149907112121582, + "objective/train/theoretical_loss": 3.511892269389743, + "objective/train/tokens_used": 1540895200, + "objective/train/value_avg": -0.017730712890625, + "objective/train/value_loss": 0.005347291007637978, + "objective/train/value_max": -0.00034880638122558594, + "objective/train/value_min": -0.97998046875, + "objective/train/value_reward_corr": 0.4498308046214956, + "objective/train/value_std": 0.04254150390625, + "objective/train/weight_avg": 0.9998846650123596, + "objective/train/weighted_lm_loss": 3.1500449180603027, + "objective/train/weights_max": 1.082824945449829, + "objective/train/weights_min": 0.9089274406433105, + "theoretical_loss": 3.511892269389743, + "tokens_seen": 1520435200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005447833065810594, + "loss": 1.4745, + "theoretical_loss": 3.511892269389743, + "tokens_seen": 1520435200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005444622792937399, + "loss": 1.4362, + "theoretical_loss": 3.5116793711429004, + "tokens_seen": 1521483776 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0003034705005120486, + "objective/train/docs_used": 863367, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.829059600830078, + "objective/train/original_loss": 2.829059362411499, + "objective/train/theoretical_loss": 3.5115596983944855, + "objective/train/tokens_used": 1542533600, + "objective/train/value_avg": -0.029815673828125, + "objective/train/value_loss": 0.008518144488334656, + "objective/train/value_max": -0.00033664703369140625, + "objective/train/value_min": -0.9921875, + "objective/train/value_reward_corr": 0.5921221998007143, + "objective/train/value_std": 0.085205078125, + "objective/train/weight_avg": 1.000072717666626, + "objective/train/weighted_lm_loss": 2.828939914703369, + "objective/train/weights_max": 1.096489667892456, + "objective/train/weights_min": 0.9067347645759583, + "theoretical_loss": 3.5115596983944855, + "tokens_seen": 1522073600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005441412520064206, + "loss": 1.5006, + "theoretical_loss": 3.511466660621508, + "tokens_seen": 1522532352 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005438202247191011, + "loss": 1.4776, + "theoretical_loss": 3.5112541375309214, + "tokens_seen": 1523580928 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.002923002000898123, + "objective/train/docs_used": 864510, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8404910564422607, + "objective/train/original_loss": 2.84049129486084, + "objective/train/theoretical_loss": 3.511227585308678, + "objective/train/tokens_used": 1544172000, + "objective/train/value_avg": -0.01242828369140625, + "objective/train/value_loss": 0.0013826539507135749, + "objective/train/value_max": -0.0003597736358642578, + "objective/train/value_min": -0.52734375, + "objective/train/value_reward_corr": 0.2013525767504497, + "objective/train/value_std": 0.012664794921875, + "objective/train/weight_avg": 1.000299096107483, + "objective/train/weighted_lm_loss": 2.841383934020996, + "objective/train/weights_max": 1.0490620136260986, + "objective/train/weights_min": 0.9178963899612427, + "theoretical_loss": 3.511227585308678, + "tokens_seen": 1523712000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005434991974317817, + "loss": 1.4854, + "theoretical_loss": 3.511041801577159, + "tokens_seen": 1524629504 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0007218934479169548, + "objective/train/docs_used": 865194, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0703041553497314, + "objective/train/original_loss": 3.070303201675415, + "objective/train/theoretical_loss": 3.5108959290104753, + "objective/train/tokens_used": 1545810400, + "objective/train/value_avg": -0.0176849365234375, + "objective/train/value_loss": 0.0041197920218110085, + "objective/train/value_max": -0.0003654956817626953, + "objective/train/value_min": -0.3251953125, + "objective/train/value_reward_corr": 0.4305886306018514, + "objective/train/value_std": 0.028289794921875, + "objective/train/weight_avg": 1.0000925064086914, + "objective/train/weighted_lm_loss": 3.0708189010620117, + "objective/train/weights_max": 1.0258351564407349, + "objective/train/weights_min": 0.9300140738487244, + "theoretical_loss": 3.5108959290104753, + "tokens_seen": 1525350400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005431781701444623, + "loss": 1.4823, + "theoretical_loss": 3.510829652466904, + "tokens_seen": 1525678080 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005428571428571428, + "loss": 1.4433, + "theoretical_loss": 3.5106176899074972, + "tokens_seen": 1526726656 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.005543163511902094, + "objective/train/docs_used": 866480, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6502089500427246, + "objective/train/original_loss": 2.6502091884613037, + "objective/train/theoretical_loss": 3.510564728381983, + "objective/train/tokens_used": 1547448800, + "objective/train/value_avg": -0.0154266357421875, + "objective/train/value_loss": 0.0034996189642697573, + "objective/train/value_max": -0.00042891502380371094, + "objective/train/value_min": -0.8037109375, + "objective/train/value_reward_corr": 0.3132333698459507, + "objective/train/value_std": 0.025115966796875, + "objective/train/weight_avg": 1.000571608543396, + "objective/train/weighted_lm_loss": 2.651353597640991, + "objective/train/weights_max": 1.0403190851211548, + "objective/train/weights_min": 0.9182108640670776, + "theoretical_loss": 3.510564728381983, + "tokens_seen": 1526988800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005425361155698234, + "loss": 1.4477, + "theoretical_loss": 3.510405913606943, + "tokens_seen": 1527775232 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.00951419584453106, + "objective/train/docs_used": 867112, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.035895824432373, + "objective/train/original_loss": 3.035895586013794, + "objective/train/theoretical_loss": 3.510233982309237, + "objective/train/tokens_used": 1549087200, + "objective/train/value_avg": -0.0198822021484375, + "objective/train/value_loss": 0.0011171087389811873, + "objective/train/value_max": -0.0007181167602539062, + "objective/train/value_min": -0.60205078125, + "objective/train/value_reward_corr": 0.3769739778643463, + "objective/train/value_std": 0.0251617431640625, + "objective/train/weight_avg": 1.0009570121765137, + "objective/train/weighted_lm_loss": 3.0387682914733887, + "objective/train/weights_max": 1.0576070547103882, + "objective/train/weights_min": 0.9330268502235413, + "theoretical_loss": 3.510233982309237, + "tokens_seen": 1528627200 + }, + { + "epoch": 0.46, + "learning_rate": 0.000542215088282504, + "loss": 1.4588, + "theoretical_loss": 3.510194323273899, + "tokens_seen": 1528823808 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005418940609951846, + "loss": 1.4509, + "theoretical_loss": 3.5099829186176796, + "tokens_seen": 1529872384 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": -0.0025102586951106787, + "objective/train/docs_used": 868323, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7123234272003174, + "objective/train/original_loss": 2.7123234272003174, + "objective/train/theoretical_loss": 3.5099036896821874, + "objective/train/tokens_used": 1550725600, + "objective/train/value_avg": -0.0208740234375, + "objective/train/value_loss": 0.008875284343957901, + "objective/train/value_max": -0.0006189346313476562, + "objective/train/value_min": -0.9599609375, + "objective/train/value_reward_corr": 0.5293783673635226, + "objective/train/value_std": 0.0523681640625, + "objective/train/weight_avg": 0.9997925162315369, + "objective/train/weighted_lm_loss": 2.7106611728668213, + "objective/train/weights_max": 1.0675808191299438, + "objective/train/weights_min": 0.9085284471511841, + "theoretical_loss": 3.5099036896821874, + "tokens_seen": 1530265600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005415730337078651, + "loss": 1.4293, + "theoretical_loss": 3.509771699348253, + "tokens_seen": 1530920960 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.003151576966047287, + "objective/train/docs_used": 869121, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0608627796173096, + "objective/train/original_loss": 3.0608632564544678, + "objective/train/theoretical_loss": 3.5095738493946786, + "objective/train/tokens_used": 1552364000, + "objective/train/value_avg": -0.00980377197265625, + "objective/train/value_loss": 0.00045792938908562064, + "objective/train/value_max": -0.0002491474151611328, + "objective/train/value_min": -0.261962890625, + "objective/train/value_reward_corr": 0.260585613183321, + "objective/train/value_std": 0.0166015625, + "objective/train/weight_avg": 1.0003174543380737, + "objective/train/weighted_lm_loss": 3.061413288116455, + "objective/train/weights_max": 1.023884654045105, + "objective/train/weights_min": 0.9911699295043945, + "theoretical_loss": 3.5095738493946786, + "tokens_seen": 1531904000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005412520064205457, + "loss": 1.48, + "theoretical_loss": 3.5095606651762368, + "tokens_seen": 1531969536 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005409309791332263, + "loss": 1.4943, + "theoretical_loss": 3.5093498158128997, + "tokens_seen": 1533018112 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0047137984074652195, + "objective/train/docs_used": 869825, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.920365810394287, + "objective/train/original_loss": 2.920365810394287, + "objective/train/theoretical_loss": 3.5092444603444344, + "objective/train/tokens_used": 1554002400, + "objective/train/value_avg": -0.018310546875, + "objective/train/value_loss": 0.0019777112174779177, + "objective/train/value_max": -0.0004239082336425781, + "objective/train/value_min": -0.350341796875, + "objective/train/value_reward_corr": 0.5630217153701342, + "objective/train/value_std": 0.034881591796875, + "objective/train/weight_avg": 1.0004812479019165, + "objective/train/weighted_lm_loss": 2.921506881713867, + "objective/train/weights_max": 1.027953863143921, + "objective/train/weights_min": 0.9220966696739197, + "theoretical_loss": 3.5092444603444344, + "tokens_seen": 1533542400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005406099518459068, + "loss": 1.4747, + "theoretical_loss": 3.509139150970157, + "tokens_seen": 1534066688 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005402889245585874, + "loss": 1.4449, + "theoretical_loss": 3.5089286703605698, + "tokens_seen": 1535115264 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.004598269704729319, + "objective/train/docs_used": 871111, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.148249864578247, + "objective/train/original_loss": 3.148249626159668, + "objective/train/theoretical_loss": 3.508915521433037, + "objective/train/tokens_used": 1555640800, + "objective/train/value_avg": -0.0099639892578125, + "objective/train/value_loss": 0.0007063549128361046, + "objective/train/value_max": -0.0005483627319335938, + "objective/train/value_min": -0.235595703125, + "objective/train/value_reward_corr": 0.15108573008534126, + "objective/train/value_std": 0.00931549072265625, + "objective/train/weight_avg": 1.0004632472991943, + "objective/train/weighted_lm_loss": 3.1497180461883545, + "objective/train/weights_max": 1.0236060619354248, + "objective/train/weights_min": 0.9328836798667908, + "theoretical_loss": 3.508915521433037, + "tokens_seen": 1535180800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005399678972712681, + "loss": 1.4642, + "theoretical_loss": 3.5087183736973437, + "tokens_seen": 1536163840 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0101883290335536, + "objective/train/docs_used": 871860, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0251824855804443, + "objective/train/original_loss": 3.0251824855804443, + "objective/train/theoretical_loss": 3.5085870315659133, + "objective/train/tokens_used": 1557279200, + "objective/train/value_avg": -0.0186920166015625, + "objective/train/value_loss": 0.00078724161721766, + "objective/train/value_max": -0.0008072853088378906, + "objective/train/value_min": -0.521484375, + "objective/train/value_reward_corr": 0.5149031391296723, + "objective/train/value_std": 0.0273895263671875, + "objective/train/weight_avg": 1.0010226964950562, + "objective/train/weighted_lm_loss": 3.0289011001586914, + "objective/train/weights_max": 1.0422497987747192, + "objective/train/weights_min": 0.9779797196388245, + "theoretical_loss": 3.5085870315659133, + "tokens_seen": 1536819200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005396468699839487, + "loss": 1.4675, + "theoretical_loss": 3.5085082606943243, + "tokens_seen": 1537212416 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005393258426966292, + "loss": 1.4691, + "theoretical_loss": 3.508298331065999, + "tokens_seen": 1538260992 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0005490907351486385, + "objective/train/docs_used": 873345, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.156883955001831, + "objective/train/original_loss": 3.156883716583252, + "objective/train/theoretical_loss": 3.508258989652313, + "objective/train/tokens_used": 1558917600, + "objective/train/value_avg": -0.01287841796875, + "objective/train/value_loss": 0.002492834348231554, + "objective/train/value_max": -0.00041413307189941406, + "objective/train/value_min": -0.181640625, + "objective/train/value_reward_corr": 0.3474064297943737, + "objective/train/value_std": 0.01654052734375, + "objective/train/weight_avg": 1.000067114830017, + "objective/train/weighted_lm_loss": 3.1579084396362305, + "objective/train/weights_max": 1.0164530277252197, + "objective/train/weights_min": 0.9241511225700378, + "theoretical_loss": 3.508258989652313, + "tokens_seen": 1538457600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005390048154093098, + "loss": 1.5153, + "theoretical_loss": 3.508088584527492, + "tokens_seen": 1539309568 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.00740701612085104, + "objective/train/docs_used": 874032, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.676724910736084, + "objective/train/original_loss": 3.676724672317505, + "objective/train/theoretical_loss": 3.507931394605294, + "objective/train/tokens_used": 1560556000, + "objective/train/value_avg": -0.01158905029296875, + "objective/train/value_loss": 0.0007052822038531303, + "objective/train/value_max": -0.0003077983856201172, + "objective/train/value_min": -0.337158203125, + "objective/train/value_reward_corr": 0.23896949520847982, + "objective/train/value_std": 0.01715087890625, + "objective/train/weight_avg": 1.0007442235946655, + "objective/train/weighted_lm_loss": 3.679673194885254, + "objective/train/weights_max": 1.0307743549346924, + "objective/train/weights_min": 0.9542831778526306, + "theoretical_loss": 3.507931394605294, + "tokens_seen": 1540096000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005386837881219904, + "loss": 1.4394, + "theoretical_loss": 3.5078790207945647, + "tokens_seen": 1540358144 + }, + { + "epoch": 0.47, + "learning_rate": 0.000538362760834671, + "loss": 1.4249, + "theoretical_loss": 3.507669639583612, + "tokens_seen": 1541406720 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0038361414335668087, + "objective/train/docs_used": 874753, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.017888069152832, + "objective/train/original_loss": 3.017888069152832, + "objective/train/theoretical_loss": 3.5076042453417045, + "objective/train/tokens_used": 1562194400, + "objective/train/value_avg": -0.01023101806640625, + "objective/train/value_loss": 0.0010330973891541362, + "objective/train/value_max": -0.0005884170532226562, + "objective/train/value_min": -0.192138671875, + "objective/train/value_reward_corr": 0.12422095349583527, + "objective/train/value_std": 0.01067352294921875, + "objective/train/weight_avg": 1.000388741493225, + "objective/train/weighted_lm_loss": 3.01924204826355, + "objective/train/weights_max": 1.018243432044983, + "objective/train/weights_min": 0.9608317017555237, + "theoretical_loss": 3.5076042453417045, + "tokens_seen": 1541734400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005380417335473515, + "loss": 1.4708, + "theoretical_loss": 3.5074604406116627, + "tokens_seen": 1542455296 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.002057997975498438, + "objective/train/docs_used": 875990, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2890162467956543, + "objective/train/original_loss": 3.2890162467956543, + "objective/train/theoretical_loss": 3.507277540782165, + "objective/train/tokens_used": 1563832800, + "objective/train/value_avg": -0.015869140625, + "objective/train/value_loss": 0.004703517071902752, + "objective/train/value_max": -0.00043392181396484375, + "objective/train/value_min": -0.93798828125, + "objective/train/value_reward_corr": 0.7442861462341388, + "objective/train/value_std": 0.050537109375, + "objective/train/weight_avg": 1.000228762626648, + "objective/train/weighted_lm_loss": 3.2884538173675537, + "objective/train/weights_max": 1.063023328781128, + "objective/train/weights_min": 0.9125609397888184, + "theoretical_loss": 3.507277540782165, + "tokens_seen": 1543372800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005377207062600321, + "loss": 1.4879, + "theoretical_loss": 3.507251423596374, + "tokens_seen": 1543503872 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005373996789727127, + "loss": 1.4847, + "theoretical_loss": 3.5070425882560343, + "tokens_seen": 1544552448 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": -0.000380720040993765, + "objective/train/docs_used": 876526, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6956872940063477, + "objective/train/original_loss": 2.6956868171691895, + "objective/train/theoretical_loss": 3.5069512798510534, + "objective/train/tokens_used": 1565471200, + "objective/train/value_avg": -0.018218994140625, + "objective/train/value_loss": 0.005699073430150747, + "objective/train/value_max": -0.00027370452880859375, + "objective/train/value_min": -0.6650390625, + "objective/train/value_reward_corr": 0.531662451293742, + "objective/train/value_std": 0.0318603515625, + "objective/train/weight_avg": 0.9999898076057434, + "objective/train/weighted_lm_loss": 2.6957600116729736, + "objective/train/weights_max": 1.0211271047592163, + "objective/train/weights_min": 0.910030722618103, + "theoretical_loss": 3.5069512798510534, + "tokens_seen": 1545011200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005370786516853932, + "loss": 1.4645, + "theoretical_loss": 3.506833934309558, + "tokens_seen": 1545601024 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.00347149302251637, + "objective/train/docs_used": 877732, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9619204998016357, + "objective/train/original_loss": 2.9619204998016357, + "objective/train/theoretical_loss": 3.5066254614764842, + "objective/train/tokens_used": 1567109600, + "objective/train/value_avg": -0.01361846923828125, + "objective/train/value_loss": 0.0013046113308519125, + "objective/train/value_max": -0.0006313323974609375, + "objective/train/value_min": -0.263671875, + "objective/train/value_reward_corr": 0.3568249415230711, + "objective/train/value_std": 0.015777587890625, + "objective/train/weight_avg": 1.0003535747528076, + "objective/train/weighted_lm_loss": 2.9625048637390137, + "objective/train/weights_max": 1.022027850151062, + "objective/train/weights_min": 0.9640822410583496, + "theoretical_loss": 3.5066254614764842, + "tokens_seen": 1546649600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005367576243980738, + "loss": 1.4584, + "theoretical_loss": 3.5066254614764842, + "tokens_seen": 1546649600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005364365971107544, + "loss": 1.4812, + "theoretical_loss": 3.5064171694769763, + "tokens_seen": 1547698176 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0011501213302835822, + "objective/train/docs_used": 878530, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.878906488418579, + "objective/train/original_loss": 2.878906011581421, + "objective/train/theoretical_loss": 3.506300084590295, + "objective/train/tokens_used": 1568748000, + "objective/train/value_avg": -0.02764892578125, + "objective/train/value_loss": 0.003482931526377797, + "objective/train/value_max": -0.0004582405090332031, + "objective/train/value_min": -0.9912109375, + "objective/train/value_reward_corr": 0.8471500584674378, + "objective/train/value_std": 0.0887451171875, + "objective/train/weight_avg": 1.0001322031021118, + "objective/train/weighted_lm_loss": 2.879035234451294, + "objective/train/weights_max": 1.0476287603378296, + "objective/train/weights_min": 0.9152440428733826, + "theoretical_loss": 3.506300084590295, + "tokens_seen": 1548288000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005361155698234351, + "loss": 1.4962, + "theoretical_loss": 3.5062090580318186, + "tokens_seen": 1548746752 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005357945425361156, + "loss": 1.4683, + "theoretical_loss": 3.506001126862416, + "tokens_seen": 1549795328 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": -0.002680467441678047, + "objective/train/docs_used": 879841, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.89603853225708, + "objective/train/original_loss": 2.896038293838501, + "objective/train/theoretical_loss": 3.5059751481280284, + "objective/train/tokens_used": 1570386400, + "objective/train/value_avg": -0.0179595947265625, + "objective/train/value_loss": 0.002243633382022381, + "objective/train/value_max": -0.0002715587615966797, + "objective/train/value_min": -0.73583984375, + "objective/train/value_reward_corr": 0.6549644166390354, + "objective/train/value_std": 0.030975341796875, + "objective/train/weight_avg": 0.9997431039810181, + "objective/train/weighted_lm_loss": 2.8960087299346924, + "objective/train/weights_max": 1.0352933406829834, + "objective/train/weights_min": 0.920462429523468, + "theoretical_loss": 3.5059751481280284, + "tokens_seen": 1549926400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005354735152487962, + "loss": 1.4799, + "theoretical_loss": 3.505793375690791, + "tokens_seen": 1550843904 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": -0.0012836181558668613, + "objective/train/docs_used": 880523, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3326878547668457, + "objective/train/original_loss": 3.3326878547668457, + "objective/train/theoretical_loss": 3.5056506510289154, + "objective/train/tokens_used": 1572024800, + "objective/train/value_avg": -0.0230255126953125, + "objective/train/value_loss": 0.0050593772903084755, + "objective/train/value_max": -0.00025916099548339844, + "objective/train/value_min": -0.8681640625, + "objective/train/value_reward_corr": 0.6783774078337296, + "objective/train/value_std": 0.056549072265625, + "objective/train/weight_avg": 0.9998965263366699, + "objective/train/weighted_lm_loss": 3.3319709300994873, + "objective/train/weights_max": 1.0365372896194458, + "objective/train/weights_min": 0.9101630449295044, + "theoretical_loss": 3.5056506510289154, + "tokens_seen": 1551564800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005351524879614768, + "loss": 1.5027, + "theoretical_loss": 3.5055858042395815, + "tokens_seen": 1551892480 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005348314606741573, + "loss": 1.4727, + "theoretical_loss": 3.5053784122320417, + "tokens_seen": 1552941056 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.006784412078559399, + "objective/train/docs_used": 881131, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5220274925231934, + "objective/train/original_loss": 3.5220277309417725, + "objective/train/theoretical_loss": 3.505326592235857, + "objective/train/tokens_used": 1573663200, + "objective/train/value_avg": -0.01351165771484375, + "objective/train/value_loss": 0.0020451624877750874, + "objective/train/value_max": -0.0005550384521484375, + "objective/train/value_min": -0.8115234375, + "objective/train/value_reward_corr": 0.5300153107796004, + "objective/train/value_std": 0.0222320556640625, + "objective/train/weight_avg": 1.0006885528564453, + "objective/train/weighted_lm_loss": 3.524695634841919, + "objective/train/weights_max": 1.0461548566818237, + "objective/train/weights_min": 0.9150742888450623, + "theoretical_loss": 3.505326592235857, + "tokens_seen": 1553203200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005345104333868379, + "loss": 1.4634, + "theoretical_loss": 3.505171199392036, + "tokens_seen": 1553989632 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0025803863536566496, + "objective/train/docs_used": 882511, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2660765647888184, + "objective/train/original_loss": 3.2660768032073975, + "objective/train/theoretical_loss": 3.5050029706954113, + "objective/train/tokens_used": 1575301600, + "objective/train/value_avg": -0.0207366943359375, + "objective/train/value_loss": 0.00383814238011837, + "objective/train/value_max": -0.0003077983856201172, + "objective/train/value_min": -0.91796875, + "objective/train/value_reward_corr": 0.6709912154241202, + "objective/train/value_std": 0.055694580078125, + "objective/train/weight_avg": 1.0002769231796265, + "objective/train/weighted_lm_loss": 3.2667505741119385, + "objective/train/weights_max": 1.0572092533111572, + "objective/train/weights_min": 0.9085005521774292, + "theoretical_loss": 3.5050029706954113, + "tokens_seen": 1554841600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005341894060995185, + "loss": 1.4771, + "theoretical_loss": 3.504964165444042, + "tokens_seen": 1555038208 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005338683788121991, + "loss": 1.4737, + "theoretical_loss": 3.504757310113145, + "tokens_seen": 1556086784 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.007640957832336426, + "objective/train/docs_used": 882978, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.168442487716675, + "objective/train/original_loss": 3.1684420108795166, + "objective/train/theoretical_loss": 3.504679785357773, + "objective/train/tokens_used": 1576940000, + "objective/train/value_avg": -0.0192108154296875, + "objective/train/value_loss": 0.0023604463785886765, + "objective/train/value_max": -0.0004601478576660156, + "objective/train/value_min": -0.39501953125, + "objective/train/value_reward_corr": 0.5898899305953479, + "objective/train/value_std": 0.04052734375, + "objective/train/weight_avg": 1.000775694847107, + "objective/train/weighted_lm_loss": 3.17099928855896, + "objective/train/weights_max": 1.0331066846847534, + "objective/train/weights_min": 0.9331196546554565, + "theoretical_loss": 3.504679785357773, + "tokens_seen": 1556480000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005335473515248796, + "loss": 1.4528, + "theoretical_loss": 3.5045506331250382, + "tokens_seen": 1557135360 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": -4.949166395817883e-06, + "objective/train/docs_used": 884109, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.857518196105957, + "objective/train/original_loss": 2.857517957687378, + "objective/train/theoretical_loss": 3.5043570351767617, + "objective/train/tokens_used": 1578578400, + "objective/train/value_avg": -0.034027099609375, + "objective/train/value_loss": 0.0033157148864120245, + "objective/train/value_max": -0.0005931854248046875, + "objective/train/value_min": -0.81396484375, + "objective/train/value_reward_corr": 0.8343764098273041, + "objective/train/value_std": 0.07550048828125, + "objective/train/weight_avg": 1.0000159740447998, + "objective/train/weighted_lm_loss": 2.858224868774414, + "objective/train/weights_max": 1.041373610496521, + "objective/train/weights_min": 0.9357345104217529, + "theoretical_loss": 3.5043570351767617, + "tokens_seen": 1558118400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005332263242375602, + "loss": 1.4656, + "theoretical_loss": 3.50434413420602, + "tokens_seen": 1558183936 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005329052969502408, + "loss": 1.4947, + "theoretical_loss": 3.504137813082994, + "tokens_seen": 1559232512 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.009238502942025661, + "objective/train/docs_used": 884823, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9313948154449463, + "objective/train/original_loss": 2.9313950538635254, + "objective/train/theoretical_loss": 3.504034719109799, + "objective/train/tokens_used": 1580216800, + "objective/train/value_avg": -0.0153961181640625, + "objective/train/value_loss": 0.0020541097037494183, + "objective/train/value_max": -0.0003554821014404297, + "objective/train/value_min": -0.45263671875, + "objective/train/value_reward_corr": 0.5076060238793014, + "objective/train/value_std": 0.03607177734375, + "objective/train/weight_avg": 1.00093412399292, + "objective/train/weighted_lm_loss": 2.9336891174316406, + "objective/train/weights_max": 1.0451135635375977, + "objective/train/weights_min": 0.9314185976982117, + "theoretical_loss": 3.504034719109799, + "tokens_seen": 1559756800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005325842696629213, + "loss": 1.4945, + "theoretical_loss": 3.5039316694834635, + "tokens_seen": 1560281088 + }, + { + "epoch": 0.47, + "learning_rate": 0.000532263242375602, + "loss": 1.4838, + "theoretical_loss": 3.5037257031355344, + "tokens_seen": 1561329664 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0014369406271725893, + "objective/train/docs_used": 885915, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.988982677459717, + "objective/train/original_loss": 2.988982915878296, + "objective/train/theoretical_loss": 3.5037128361178995, + "objective/train/tokens_used": 1581855200, + "objective/train/value_avg": -0.01468658447265625, + "objective/train/value_loss": 0.002069955924525857, + "objective/train/value_max": -0.0003418922424316406, + "objective/train/value_min": -0.93701171875, + "objective/train/value_reward_corr": 0.28211406514223264, + "objective/train/value_std": 0.033721923828125, + "objective/train/weight_avg": 1.0001540184020996, + "objective/train/weighted_lm_loss": 2.9901633262634277, + "objective/train/weights_max": 1.0794172286987305, + "objective/train/weights_min": 0.9472257494926453, + "theoretical_loss": 3.5037128361178995, + "tokens_seen": 1561395200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005319422150882826, + "loss": 1.4873, + "theoretical_loss": 3.5035199137679105, + "tokens_seen": 1562378240 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.006988472770899534, + "objective/train/docs_used": 886454, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0570342540740967, + "objective/train/original_loss": 3.0570340156555176, + "objective/train/theoretical_loss": 3.50339138516565, + "objective/train/tokens_used": 1583493600, + "objective/train/value_avg": -0.01432037353515625, + "objective/train/value_loss": 0.0034890654496848583, + "objective/train/value_max": -0.0005421638488769531, + "objective/train/value_min": -0.9375, + "objective/train/value_reward_corr": 0.26800312796084474, + "objective/train/value_std": 0.0355224609375, + "objective/train/weight_avg": 1.000715970993042, + "objective/train/weighted_lm_loss": 3.0588202476501465, + "objective/train/weights_max": 1.0704506635665894, + "objective/train/weights_min": 0.9100242257118225, + "theoretical_loss": 3.50339138516565, + "tokens_seen": 1563033600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005316211878009632, + "loss": 1.4944, + "theoretical_loss": 3.503314301109892, + "tokens_seen": 1563426816 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005313001605136437, + "loss": 1.5018, + "theoretical_loss": 3.5031088648913755, + "tokens_seen": 1564475392 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 7.349669613176957e-05, + "objective/train/docs_used": 887471, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5765585899353027, + "objective/train/original_loss": 2.576559066772461, + "objective/train/theoretical_loss": 3.5030703652211947, + "objective/train/tokens_used": 1585132000, + "objective/train/value_avg": -0.0098419189453125, + "objective/train/value_loss": 0.0009382889256812632, + "objective/train/value_max": -0.00037860870361328125, + "objective/train/value_min": -0.1749267578125, + "objective/train/value_reward_corr": 0.11719257013468043, + "objective/train/value_std": 0.0091552734375, + "objective/train/weight_avg": 1.0000120401382446, + "objective/train/weighted_lm_loss": 2.577223777770996, + "objective/train/weights_max": 1.0173721313476562, + "objective/train/weights_min": 0.9742282032966614, + "theoretical_loss": 3.5030703652211947, + "tokens_seen": 1564672000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005309791332263243, + "loss": 1.5067, + "theoretical_loss": 3.5029036048428503, + "tokens_seen": 1565523968 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.003344169119372964, + "objective/train/docs_used": 888126, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.566437005996704, + "objective/train/original_loss": 2.566437244415283, + "objective/train/theoretical_loss": 3.5027497752562198, + "objective/train/tokens_used": 1586770400, + "objective/train/value_avg": -0.0204315185546875, + "objective/train/value_loss": 0.00464992132037878, + "objective/train/value_max": -0.0005483627319335938, + "objective/train/value_min": -0.916015625, + "objective/train/value_reward_corr": 0.5366865569397461, + "objective/train/value_std": 0.037109375, + "objective/train/weight_avg": 1.0003571510314941, + "objective/train/weighted_lm_loss": 2.5668702125549316, + "objective/train/weights_max": 1.0527673959732056, + "objective/train/weights_min": 0.9107993841171265, + "theoretical_loss": 3.5027497752562198, + "tokens_seen": 1566310400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005306581059390049, + "loss": 1.4943, + "theoretical_loss": 3.502698520695398, + "tokens_seen": 1566572544 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005303370786516855, + "loss": 1.5362, + "theoretical_loss": 3.5024936121806896, + "tokens_seen": 1567621120 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0022410040255635977, + "objective/train/docs_used": 889384, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.3822484016418457, + "objective/train/original_loss": 3.3822484016418457, + "objective/train/theoretical_loss": 3.502429614245937, + "objective/train/tokens_used": 1588408800, + "objective/train/value_avg": -0.032196044921875, + "objective/train/value_loss": 0.006178413983434439, + "objective/train/value_max": -0.0005316734313964844, + "objective/train/value_min": -0.76171875, + "objective/train/value_reward_corr": 0.652889244965163, + "objective/train/value_std": 0.05670166015625, + "objective/train/weight_avg": 1.0002546310424805, + "objective/train/weighted_lm_loss": 3.383061647415161, + "objective/train/weights_max": 1.0391374826431274, + "objective/train/weights_min": 0.9149351716041565, + "theoretical_loss": 3.502429614245937, + "tokens_seen": 1567948800 + }, + { + "epoch": 0.48, + "learning_rate": 0.000530016051364366, + "loss": 1.4982, + "theoretical_loss": 3.502288879030986, + "tokens_seen": 1568669696 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0008221061434596777, + "objective/train/docs_used": 890057, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.302333354949951, + "objective/train/original_loss": 3.302333116531372, + "objective/train/theoretical_loss": 3.5021098811690674, + "objective/train/tokens_used": 1590047200, + "objective/train/value_avg": -0.0212860107421875, + "objective/train/value_loss": 0.0042659202590584755, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.9033203125, + "objective/train/value_reward_corr": 0.556140764341134, + "objective/train/value_std": 0.042327880859375, + "objective/train/weight_avg": 1.000103235244751, + "objective/train/weighted_lm_loss": 3.3025991916656494, + "objective/train/weights_max": 1.0691401958465576, + "objective/train/weights_min": 0.9131553769111633, + "theoretical_loss": 3.5021098811690674, + "tokens_seen": 1569587200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005296950240770465, + "loss": 1.4907, + "theoretical_loss": 3.5020843209791326, + "tokens_seen": 1569718272 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005293739967897271, + "loss": 1.5092, + "theoretical_loss": 3.501879937758562, + "tokens_seen": 1570766848 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.010910311713814735, + "objective/train/docs_used": 891281, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0982069969177246, + "objective/train/original_loss": 3.0982072353363037, + "objective/train/theoretical_loss": 3.5017905750078278, + "objective/train/tokens_used": 1591685600, + "objective/train/value_avg": -0.04425048828125, + "objective/train/value_loss": 0.008815034292638302, + "objective/train/value_max": -0.0007734298706054688, + "objective/train/value_min": -0.93359375, + "objective/train/value_reward_corr": 0.6054608603726327, + "objective/train/value_std": 0.08966064453125, + "objective/train/weight_avg": 1.0011345148086548, + "objective/train/weighted_lm_loss": 3.100801467895508, + "objective/train/weights_max": 1.0625356435775757, + "objective/train/weights_min": 0.9071794152259827, + "theoretical_loss": 3.5017905750078278, + "tokens_seen": 1571225600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005290529695024076, + "loss": 1.496, + "theoretical_loss": 3.5016757291032903, + "tokens_seen": 1571815424 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.004641312174499035, + "objective/train/docs_used": 891900, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0544679164886475, + "objective/train/original_loss": 3.0544676780700684, + "objective/train/theoretical_loss": 3.501471694747913, + "objective/train/tokens_used": 1593324000, + "objective/train/value_avg": -0.01499176025390625, + "objective/train/value_loss": 0.0018496594857424498, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.94921875, + "objective/train/value_reward_corr": 0.5198735343313906, + "objective/train/value_std": 0.0298004150390625, + "objective/train/weight_avg": 1.0004732608795166, + "objective/train/weighted_lm_loss": 3.0563957691192627, + "objective/train/weights_max": 1.0252693891525269, + "objective/train/weights_min": 0.9286385178565979, + "theoretical_loss": 3.501471694747913, + "tokens_seen": 1572864000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005287319422150882, + "loss": 1.5094, + "theoretical_loss": 3.501471694747913, + "tokens_seen": 1572864000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005284109149277688, + "loss": 1.5277, + "theoretical_loss": 3.5012678344276082, + "tokens_seen": 1573912576 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0055293673649430275, + "objective/train/docs_used": 893256, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.994413137435913, + "objective/train/original_loss": 2.994413137435913, + "objective/train/theoretical_loss": 3.5011532393784806, + "objective/train/tokens_used": 1594962400, + "objective/train/value_avg": -0.01031494140625, + "objective/train/value_loss": 0.0013958826893940568, + "objective/train/value_max": -0.0003905296325683594, + "objective/train/value_min": -0.54736328125, + "objective/train/value_reward_corr": 0.3070969440289535, + "objective/train/value_std": 0.013397216796875, + "objective/train/weight_avg": 1.0005598068237305, + "objective/train/weighted_lm_loss": 2.9956164360046387, + "objective/train/weights_max": 1.0203626155853271, + "objective/train/weights_min": 0.9104170203208923, + "theoretical_loss": 3.5011532393784806, + "tokens_seen": 1574502400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005280898876404495, + "loss": 1.4958, + "theoretical_loss": 3.5010641478781306, + "tokens_seen": 1574961152 + }, + { + "epoch": 0.48, + "learning_rate": 0.00052776886035313, + "loss": 1.5151, + "theoretical_loss": 3.5008606348358136, + "tokens_seen": 1576009728 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.004117940086871386, + "objective/train/docs_used": 893728, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9533536434173584, + "objective/train/original_loss": 2.9533536434173584, + "objective/train/theoretical_loss": 3.5008352078921368, + "objective/train/tokens_used": 1596600800, + "objective/train/value_avg": -0.016204833984375, + "objective/train/value_loss": 0.0011796477483585477, + "objective/train/value_max": -0.00034880638122558594, + "objective/train/value_min": -0.2666015625, + "objective/train/value_reward_corr": 0.6441413039792795, + "objective/train/value_std": 0.026092529296875, + "objective/train/weight_avg": 1.0004175901412964, + "objective/train/weighted_lm_loss": 2.9537861347198486, + "objective/train/weights_max": 1.020574927330017, + "objective/train/weights_min": 0.9201437830924988, + "theoretical_loss": 3.5008352078921368, + "tokens_seen": 1576140800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005274478330658106, + "loss": 1.5193, + "theoretical_loss": 3.5006572950375645, + "tokens_seen": 1577058304 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0055336239747703075, + "objective/train/docs_used": 894313, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.955779790878296, + "objective/train/original_loss": 2.955779552459717, + "objective/train/theoretical_loss": 3.50051759928492, + "objective/train/tokens_used": 1598239200, + "objective/train/value_avg": -0.0097808837890625, + "objective/train/value_loss": 0.0005526564782485366, + "objective/train/value_max": -0.00047469139099121094, + "objective/train/value_min": -0.216796875, + "objective/train/value_reward_corr": 0.18031233184695025, + "objective/train/value_std": 0.00873565673828125, + "objective/train/weight_avg": 1.0005561113357544, + "objective/train/weighted_lm_loss": 2.95737361907959, + "objective/train/weights_max": 1.0216350555419922, + "objective/train/weights_min": 0.9602119326591492, + "theoretical_loss": 3.50051759928492, + "tokens_seen": 1577779200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005271268057784912, + "loss": 1.5139, + "theoretical_loss": 3.5004541282208637, + "tokens_seen": 1578106880 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005268057784911717, + "loss": 1.5296, + "theoretical_loss": 3.500251134123765, + "tokens_seen": 1579155456 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.006138124968856573, + "objective/train/docs_used": 895615, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2437992095947266, + "objective/train/original_loss": 3.2437994480133057, + "objective/train/theoretical_loss": 3.5002004125562856, + "objective/train/tokens_used": 1599877600, + "objective/train/value_avg": -0.01244354248046875, + "objective/train/value_loss": 0.0021076530683785677, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.421142578125, + "objective/train/value_reward_corr": 0.2595658309176998, + "objective/train/value_std": 0.01776123046875, + "objective/train/weight_avg": 1.000624179840088, + "objective/train/weighted_lm_loss": 3.245492458343506, + "objective/train/weights_max": 1.0414074659347534, + "objective/train/weights_min": 0.9226412773132324, + "theoretical_loss": 3.5002004125562856, + "tokens_seen": 1579417600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005264847512038523, + "loss": 1.513, + "theoretical_loss": 3.500048312484891, + "tokens_seen": 1580204032 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": -0.0038099754601716995, + "objective/train/docs_used": 896135, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1338491439819336, + "objective/train/original_loss": 3.1338493824005127, + "objective/train/theoretical_loss": 3.499883646709091, + "objective/train/tokens_used": 1601516000, + "objective/train/value_avg": -0.038116455078125, + "objective/train/value_loss": 0.01442552637308836, + "objective/train/value_max": -0.00032258033752441406, + "objective/train/value_min": -0.99072265625, + "objective/train/value_reward_corr": 0.6046681479464792, + "objective/train/value_std": 0.09539794921875, + "objective/train/weight_avg": 0.9996899962425232, + "objective/train/weighted_lm_loss": 3.13338041305542, + "objective/train/weights_max": 1.0917598009109497, + "objective/train/weights_min": 0.9062892198562622, + "theoretical_loss": 3.499883646709091, + "tokens_seen": 1581056000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005261637239165329, + "loss": 1.4791, + "theoretical_loss": 3.4998456630434336, + "tokens_seen": 1581252608 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005258426966292135, + "loss": 1.5322, + "theoretical_loss": 3.499643185539152, + "tokens_seen": 1582301184 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.007116061169654131, + "objective/train/docs_used": 897089, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.125375270843506, + "objective/train/original_loss": 3.125375270843506, + "objective/train/theoretical_loss": 3.4995673007495816, + "objective/train/tokens_used": 1603154400, + "objective/train/value_avg": -0.0210723876953125, + "objective/train/value_loss": 0.0045008584856987, + "objective/train/value_max": -0.00044417381286621094, + "objective/train/value_min": -0.9970703125, + "objective/train/value_reward_corr": 0.7874423932265274, + "objective/train/value_std": 0.1002197265625, + "objective/train/weight_avg": 1.0007343292236328, + "objective/train/weighted_lm_loss": 3.127993106842041, + "objective/train/weights_max": 1.0962175130844116, + "objective/train/weights_min": 0.9117807745933533, + "theoretical_loss": 3.4995673007495816, + "tokens_seen": 1582694400 + }, + { + "epoch": 0.48, + "learning_rate": 0.000525521669341894, + "loss": 1.5118, + "theoretical_loss": 3.4994408797123704, + "tokens_seen": 1583349760 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": -0.009569662623107433, + "objective/train/docs_used": 897735, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6994669437408447, + "objective/train/original_loss": 2.6994669437408447, + "objective/train/theoretical_loss": 3.499251373687373, + "objective/train/tokens_used": 1604792800, + "objective/train/value_avg": -0.019744873046875, + "objective/train/value_loss": 0.0034379828721284866, + "objective/train/value_max": -0.0004858970642089844, + "objective/train/value_min": -0.3662109375, + "objective/train/value_reward_corr": 0.5706021859383826, + "objective/train/value_std": 0.0310516357421875, + "objective/train/weight_avg": 0.9990601539611816, + "objective/train/weighted_lm_loss": 2.6955864429473877, + "objective/train/weights_max": 1.0278594493865967, + "objective/train/weights_min": 0.9365003108978271, + "theoretical_loss": 3.499251373687373, + "tokens_seen": 1584332800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005252006420545746, + "loss": 1.5181, + "theoretical_loss": 3.499238745303977, + "tokens_seen": 1584398336 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005248796147672552, + "loss": 1.5014, + "theoretical_loss": 3.4990367820554216, + "tokens_seen": 1585446912 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": -0.012119381688535213, + "objective/train/docs_used": 899130, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.106149673461914, + "objective/train/original_loss": 3.106149911880493, + "objective/train/theoretical_loss": 3.49893586453544, + "objective/train/tokens_used": 1606431200, + "objective/train/value_avg": -0.017059326171875, + "objective/train/value_loss": 0.007802721578627825, + "objective/train/value_max": -0.0003275871276855469, + "objective/train/value_min": -0.9873046875, + "objective/train/value_reward_corr": 0.7794829437422556, + "objective/train/value_std": 0.060577392578125, + "objective/train/weight_avg": 0.9988263249397278, + "objective/train/weighted_lm_loss": 3.1032536029815674, + "objective/train/weights_max": 1.0191195011138916, + "objective/train/weights_min": 0.9081811904907227, + "theoretical_loss": 3.49893586453544, + "tokens_seen": 1585971200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005245585874799357, + "loss": 1.5296, + "theoretical_loss": 3.498834989708716, + "tokens_seen": 1586495488 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005242375601926163, + "loss": 1.5135, + "theoretical_loss": 3.4986333680064297, + "tokens_seen": 1587544064 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.004592425189912319, + "objective/train/docs_used": 899656, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7114691734313965, + "objective/train/original_loss": 2.7114694118499756, + "objective/train/theoretical_loss": 3.4986207723100984, + "objective/train/tokens_used": 1608069600, + "objective/train/value_avg": -0.01448822021484375, + "objective/train/value_loss": 0.0023642396554350853, + "objective/train/value_max": -0.0005998611450195312, + "objective/train/value_min": -0.85986328125, + "objective/train/value_reward_corr": 0.4955188381796918, + "objective/train/value_std": 0.0240325927734375, + "objective/train/weight_avg": 1.0004708766937256, + "objective/train/weighted_lm_loss": 2.712486982345581, + "objective/train/weights_max": 1.0390827655792236, + "objective/train/weights_min": 0.9159053564071655, + "theoretical_loss": 3.4986207723100984, + "tokens_seen": 1587609600 + }, + { + "epoch": 0.48, + "learning_rate": 0.000523916532905297, + "loss": 1.5016, + "theoretical_loss": 3.4984319166916906, + "tokens_seen": 1588592640 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": -0.002283258130773902, + "objective/train/docs_used": 900745, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.5052030086517334, + "objective/train/original_loss": 3.505202531814575, + "objective/train/theoretical_loss": 3.4983060960309915, + "objective/train/tokens_used": 1609708000, + "objective/train/value_avg": -0.0291900634765625, + "objective/train/value_loss": 0.005965047050267458, + "objective/train/value_max": -0.00042724609375, + "objective/train/value_min": -0.68798828125, + "objective/train/value_reward_corr": 0.8123830331052124, + "objective/train/value_std": 0.06951904296875, + "objective/train/weight_avg": 0.9998011589050293, + "objective/train/weighted_lm_loss": 3.504201650619507, + "objective/train/weights_max": 1.0424466133117676, + "objective/train/weights_min": 0.9240009784698486, + "theoretical_loss": 3.4983060960309915, + "tokens_seen": 1589248000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005235955056179776, + "loss": 1.496, + "theoretical_loss": 3.4982306355081825, + "tokens_seen": 1589641216 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005232744783306581, + "loss": 1.4915, + "theoretical_loss": 3.4980295242001422, + "tokens_seen": 1590689792 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.011688748374581337, + "objective/train/docs_used": 902130, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.255178928375244, + "objective/train/original_loss": 3.255178213119507, + "objective/train/theoretical_loss": 3.497991834721076, + "objective/train/tokens_used": 1611346400, + "objective/train/value_avg": -0.025421142578125, + "objective/train/value_loss": 0.003299171570688486, + "objective/train/value_max": -0.0004093647003173828, + "objective/train/value_min": -0.912109375, + "objective/train/value_reward_corr": 0.48353856985611493, + "objective/train/value_std": 0.04840087890625, + "objective/train/weight_avg": 1.0011851787567139, + "objective/train/weighted_lm_loss": 3.2584331035614014, + "objective/train/weights_max": 1.0587221384048462, + "objective/train/weights_min": 0.9100530743598938, + "theoretical_loss": 3.497991834721076, + "tokens_seen": 1590886400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005229534510433387, + "loss": 1.5045, + "theoretical_loss": 3.497828582512361, + "tokens_seen": 1591738368 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.004507665988057852, + "objective/train/docs_used": 902721, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0685713291168213, + "objective/train/original_loss": 3.068570613861084, + "objective/train/theoretical_loss": 3.4976779874066066, + "objective/train/tokens_used": 1612984800, + "objective/train/value_avg": -0.01094818115234375, + "objective/train/value_loss": 0.0003159585758112371, + "objective/train/value_max": -0.0004012584686279297, + "objective/train/value_min": -0.2418212890625, + "objective/train/value_reward_corr": 0.5323328581777946, + "objective/train/value_std": 0.01318359375, + "objective/train/weight_avg": 1.0004523992538452, + "objective/train/weighted_lm_loss": 3.0700204372406006, + "objective/train/weights_max": 1.024080514907837, + "objective/train/weights_min": 0.9774289727210999, + "theoretical_loss": 3.4976779874066066, + "tokens_seen": 1592524800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005226324237560193, + "loss": 1.4806, + "theoretical_loss": 3.4976278101901803, + "tokens_seen": 1592786944 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005223113964686999, + "loss": 1.4799, + "theoretical_loss": 3.4974272069794914, + "tokens_seen": 1593835520 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0036283929366618395, + "objective/train/docs_used": 903500, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9916369915008545, + "objective/train/original_loss": 2.9916372299194336, + "objective/train/theoretical_loss": 3.4973645531171207, + "objective/train/tokens_used": 1614623200, + "objective/train/value_avg": -0.032958984375, + "objective/train/value_loss": 0.00824460294097662, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.9775390625, + "objective/train/value_reward_corr": 0.6817210512221009, + "objective/train/value_std": 0.0975341796875, + "objective/train/weight_avg": 1.000403642654419, + "objective/train/weighted_lm_loss": 2.99261736869812, + "objective/train/weights_max": 1.0741807222366333, + "objective/train/weights_min": 0.9117754101753235, + "theoretical_loss": 3.4973645531171207, + "tokens_seen": 1594163200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005219903691813804, + "loss": 1.5274, + "theoretical_loss": 3.4972267726267336, + "tokens_seen": 1594884096 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.005359707400202751, + "objective/train/docs_used": 904004, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0787835121154785, + "objective/train/original_loss": 3.0787839889526367, + "objective/train/theoretical_loss": 3.497051530885427, + "objective/train/tokens_used": 1616261600, + "objective/train/value_avg": -0.01194000244140625, + "objective/train/value_loss": 0.0011802270309999585, + "objective/train/value_max": -0.0004916191101074219, + "objective/train/value_min": -0.1715087890625, + "objective/train/value_reward_corr": 0.19548512576213814, + "objective/train/value_std": 0.0118255615234375, + "objective/train/weight_avg": 1.0005416870117188, + "objective/train/weighted_lm_loss": 3.0805444717407227, + "objective/train/weights_max": 1.0172340869903564, + "objective/train/weights_min": 0.9270954728126526, + "theoretical_loss": 3.497051530885427, + "tokens_seen": 1595801600 + }, + { + "epoch": 0.48, + "learning_rate": 0.000521669341894061, + "loss": 1.5227, + "theoretical_loss": 3.4970265068788944, + "tokens_seen": 1595932672 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005213483146067416, + "loss": 1.4954, + "theoretical_loss": 3.4968264094835027, + "tokens_seen": 1596981248 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.00512612285092473, + "objective/train/docs_used": 904558, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6038691997528076, + "objective/train/original_loss": 2.6038694381713867, + "objective/train/theoretical_loss": 3.496738919747588, + "objective/train/tokens_used": 1617900000, + "objective/train/value_avg": -0.01421356201171875, + "objective/train/value_loss": 0.002840283326804638, + "objective/train/value_max": -0.0004107952117919922, + "objective/train/value_min": -0.861328125, + "objective/train/value_reward_corr": 0.47157896319827397, + "objective/train/value_std": 0.0232391357421875, + "objective/train/weight_avg": 1.0005265474319458, + "objective/train/weighted_lm_loss": 2.604412794113159, + "objective/train/weights_max": 1.0332036018371582, + "objective/train/weights_min": 0.9102634191513062, + "theoretical_loss": 3.496738919747588, + "tokens_seen": 1597440000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005210272873194221, + "loss": 1.5062, + "theoretical_loss": 3.4966264801886346, + "tokens_seen": 1598029824 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0030863750725984573, + "objective/train/docs_used": 905731, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.909982919692993, + "objective/train/original_loss": 2.909982681274414, + "objective/train/theoretical_loss": 3.4964267187429066, + "objective/train/tokens_used": 1619538400, + "objective/train/value_avg": -0.0154266357421875, + "objective/train/value_loss": 0.004564074333757162, + "objective/train/value_max": -0.0003845691680908203, + "objective/train/value_min": -0.9697265625, + "objective/train/value_reward_corr": 0.269710880325164, + "objective/train/value_std": 0.0228729248046875, + "objective/train/weight_avg": 1.000330924987793, + "objective/train/weighted_lm_loss": 2.9103736877441406, + "objective/train/weights_max": 1.0281169414520264, + "objective/train/weights_min": 0.9089640974998474, + "theoretical_loss": 3.4964267187429066, + "tokens_seen": 1599078400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005207062600321027, + "loss": 1.508, + "theoretical_loss": 3.4964267187429066, + "tokens_seen": 1599078400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005203852327447834, + "loss": 1.524, + "theoretical_loss": 3.4962271248954755, + "tokens_seen": 1600126976 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0013825345085933805, + "objective/train/docs_used": 906398, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.009138345718384, + "objective/train/original_loss": 3.0091381072998047, + "objective/train/theoretical_loss": 3.4961149269139127, + "objective/train/tokens_used": 1621176800, + "objective/train/value_avg": -0.01445770263671875, + "objective/train/value_loss": 0.0047578043304383755, + "objective/train/value_max": -0.0005593299865722656, + "objective/train/value_min": -0.89990234375, + "objective/train/value_reward_corr": 0.42074014321957914, + "objective/train/value_std": 0.0252685546875, + "objective/train/weight_avg": 1.0001615285873413, + "objective/train/weighted_lm_loss": 3.009337902069092, + "objective/train/weights_max": 1.038977861404419, + "objective/train/weights_min": 0.9071286916732788, + "theoretical_loss": 3.4961149269139127, + "tokens_seen": 1600716800 + }, + { + "epoch": 0.49, + "learning_rate": 0.000520064205457464, + "loss": 1.4965, + "theoretical_loss": 3.4960276983960368, + "tokens_seen": 1601175552 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005197431781701445, + "loss": 1.5283, + "theoretical_loss": 3.495828438994824, + "tokens_seen": 1602224128 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": -0.006493560504168272, + "objective/train/docs_used": 907690, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9259095191955566, + "objective/train/original_loss": 2.9259092807769775, + "objective/train/theoretical_loss": 3.495803543306348, + "objective/train/tokens_used": 1622815200, + "objective/train/value_avg": -0.026031494140625, + "objective/train/value_loss": 0.005362231750041246, + "objective/train/value_max": -0.0006046295166015625, + "objective/train/value_min": -0.90380859375, + "objective/train/value_reward_corr": 0.42807035323006987, + "objective/train/value_std": 0.039825439453125, + "objective/train/weight_avg": 0.9993773102760315, + "objective/train/weighted_lm_loss": 2.926013708114624, + "objective/train/weights_max": 1.0810495615005493, + "objective/train/weights_min": 0.9206888675689697, + "theoretical_loss": 3.495803543306348, + "tokens_seen": 1602355200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005194221508828251, + "loss": 1.4914, + "theoretical_loss": 3.495629346442607, + "tokens_seen": 1603272704 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0037271042820066214, + "objective/train/docs_used": 908320, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.291618824005127, + "objective/train/original_loss": 3.291619300842285, + "objective/train/theoretical_loss": 3.495492566969153, + "objective/train/tokens_used": 1624453600, + "objective/train/value_avg": -0.0220947265625, + "objective/train/value_loss": 0.005011627450585365, + "objective/train/value_max": -0.00028252601623535156, + "objective/train/value_min": -0.51025390625, + "objective/train/value_reward_corr": 0.4505826846521971, + "objective/train/value_std": 0.047882080078125, + "objective/train/weight_avg": 1.0003973245620728, + "objective/train/weighted_lm_loss": 3.292731761932373, + "objective/train/weights_max": 1.0418068170547485, + "objective/train/weights_min": 0.9150294065475464, + "theoretical_loss": 3.495492566969153, + "tokens_seen": 1603993600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005191011235955057, + "loss": 1.4962, + "theoretical_loss": 3.4954304204906896, + "tokens_seen": 1604321280 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005187800963081862, + "loss": 1.4994, + "theoretical_loss": 3.4952316608909078, + "tokens_seen": 1605369856 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.002373072085902095, + "objective/train/docs_used": 909776, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.215195417404175, + "objective/train/original_loss": 3.215195894241333, + "objective/train/theoretical_loss": 3.495181996954453, + "objective/train/tokens_used": 1626092000, + "objective/train/value_avg": -0.0142364501953125, + "objective/train/value_loss": 0.001982445362955332, + "objective/train/value_max": -0.0003654956817626953, + "objective/train/value_min": -0.77490234375, + "objective/train/value_reward_corr": 0.5010147100606979, + "objective/train/value_std": 0.027923583984375, + "objective/train/weight_avg": 1.0002471208572388, + "objective/train/weighted_lm_loss": 3.2171380519866943, + "objective/train/weights_max": 1.0294125080108643, + "objective/train/weights_min": 0.9211341738700867, + "theoretical_loss": 3.495181996954453, + "tokens_seen": 1605632000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005184590690208668, + "loss": 1.4725, + "theoretical_loss": 3.49503306739563, + "tokens_seen": 1606418432 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.004610189702361822, + "objective/train/docs_used": 910493, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7262680530548096, + "objective/train/original_loss": 2.7262680530548096, + "objective/train/theoretical_loss": 3.4948718323175414, + "objective/train/tokens_used": 1627730400, + "objective/train/value_avg": -0.0094146728515625, + "objective/train/value_loss": 0.000635931792203337, + "objective/train/value_max": -0.000263214111328125, + "objective/train/value_min": -0.2281494140625, + "objective/train/value_reward_corr": 0.2460812970489694, + "objective/train/value_std": 0.01043701171875, + "objective/train/weight_avg": 1.0004642009735107, + "objective/train/weighted_lm_loss": 2.727219343185425, + "objective/train/weights_max": 1.0221574306488037, + "objective/train/weights_min": 0.9263097643852234, + "theoretical_loss": 3.4948718323175414, + "tokens_seen": 1607270400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005181380417335474, + "loss": 1.4838, + "theoretical_loss": 3.4948346397577543, + "tokens_seen": 1607467008 + }, + { + "epoch": 0.49, + "learning_rate": 0.000517817014446228, + "loss": 1.5124, + "theoretical_loss": 3.4946363777307075, + "tokens_seen": 1608515584 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.005691151600331068, + "objective/train/docs_used": 911872, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6211695671081543, + "objective/train/original_loss": 2.621169090270996, + "objective/train/theoretical_loss": 3.4945620721168713, + "objective/train/tokens_used": 1629368800, + "objective/train/value_avg": -0.0187530517578125, + "objective/train/value_loss": 0.00361608131788671, + "objective/train/value_max": -0.0007734298706054688, + "objective/train/value_min": -0.96630859375, + "objective/train/value_reward_corr": 0.737098951763503, + "objective/train/value_std": 0.049713134765625, + "objective/train/weight_avg": 1.0005868673324585, + "objective/train/weighted_lm_loss": 2.622729539871216, + "objective/train/weights_max": 1.0667921304702759, + "objective/train/weights_min": 0.9092106819152832, + "theoretical_loss": 3.4945620721168713, + "tokens_seen": 1608908800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005174959871589085, + "loss": 1.474, + "theoretical_loss": 3.494438281068443, + "tokens_seen": 1609564160 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": -0.02283472940325737, + "objective/train/docs_used": 912654, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.4954349994659424, + "objective/train/original_loss": 2.4954349994659424, + "objective/train/theoretical_loss": 3.494252715414037, + "objective/train/tokens_used": 1631007200, + "objective/train/value_avg": -0.0308837890625, + "objective/train/value_loss": 0.011637781746685505, + "objective/train/value_max": -0.0004878044128417969, + "objective/train/value_min": -0.84716796875, + "objective/train/value_reward_corr": 0.8331603305441363, + "objective/train/value_std": 0.076904296875, + "objective/train/weight_avg": 0.9977738857269287, + "objective/train/weighted_lm_loss": 2.4900801181793213, + "objective/train/weights_max": 1.0165554285049438, + "objective/train/weights_min": 0.9341535568237305, + "theoretical_loss": 3.494252715414037, + "tokens_seen": 1610547200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005171749598715891, + "loss": 1.473, + "theoretical_loss": 3.4942403495254393, + "tokens_seen": 1610612736 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005168539325842697, + "loss": 1.4976, + "theoretical_loss": 3.4940425828567, + "tokens_seen": 1611661312 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.004162132274359465, + "objective/train/docs_used": 913987, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.906005382537842, + "objective/train/original_loss": 2.9060051441192627, + "objective/train/theoretical_loss": 3.493943761273761, + "objective/train/tokens_used": 1632645600, + "objective/train/value_avg": -0.01538848876953125, + "objective/train/value_loss": 0.0010290918871760368, + "objective/train/value_max": -0.0005211830139160156, + "objective/train/value_min": -0.66748046875, + "objective/train/value_reward_corr": 0.5284904727215779, + "objective/train/value_std": 0.02630615234375, + "objective/train/weight_avg": 1.000421404838562, + "objective/train/weighted_lm_loss": 2.907554864883423, + "objective/train/weights_max": 1.062092900276184, + "objective/train/weights_min": 0.9549415111541748, + "theoretical_loss": 3.493943761273761, + "tokens_seen": 1612185600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005165329052969502, + "loss": 1.4914, + "theoretical_loss": 3.4938449808177516, + "tokens_seen": 1612709888 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005162118780096309, + "loss": 1.4676, + "theoretical_loss": 3.4936475431646397, + "tokens_seen": 1613758464 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0028234964702278376, + "objective/train/docs_used": 914763, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7714591026306152, + "objective/train/original_loss": 2.7714593410491943, + "objective/train/theoretical_loss": 3.4936352087638842, + "objective/train/tokens_used": 1634284000, + "objective/train/value_avg": -0.02764892578125, + "objective/train/value_loss": 0.005344110075384378, + "objective/train/value_max": -0.0004222393035888672, + "objective/train/value_min": -0.61474609375, + "objective/train/value_reward_corr": 0.6352836716405299, + "objective/train/value_std": 0.07366943359375, + "objective/train/weight_avg": 1.000308632850647, + "objective/train/weighted_lm_loss": 2.771807909011841, + "objective/train/weights_max": 1.0308259725570679, + "objective/train/weights_min": 0.9076417088508606, + "theoretical_loss": 3.4936352087638842, + "tokens_seen": 1613824000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005158908507223115, + "loss": 1.4586, + "theoretical_loss": 3.4934502696539322, + "tokens_seen": 1614807040 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.005166909657418728, + "objective/train/docs_used": 916021, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1177761554718018, + "objective/train/original_loss": 3.1177756786346436, + "objective/train/theoretical_loss": 3.493327056955347, + "objective/train/tokens_used": 1635922400, + "objective/train/value_avg": -0.00917816162109375, + "objective/train/value_loss": 0.001420327345840633, + "objective/train/value_max": -0.00044417381286621094, + "objective/train/value_min": -0.56396484375, + "objective/train/value_reward_corr": 0.18492097458438927, + "objective/train/value_std": 0.0111846923828125, + "objective/train/weight_avg": 1.0005236864089966, + "objective/train/weighted_lm_loss": 3.1192715167999268, + "objective/train/weights_max": 1.0187267065048218, + "objective/train/weights_min": 0.9141699075698853, + "theoretical_loss": 3.493327056955347, + "tokens_seen": 1615462400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005155698234349921, + "loss": 1.4761, + "theoretical_loss": 3.493253160042713, + "tokens_seen": 1615855616 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005152487961476726, + "loss": 1.4783, + "theoretical_loss": 3.4930562140885844, + "tokens_seen": 1616904192 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0026170737110078335, + "objective/train/docs_used": 916782, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.481147050857544, + "objective/train/original_loss": 2.481147289276123, + "objective/train/theoretical_loss": 3.4930193049221785, + "objective/train/tokens_used": 1637560800, + "objective/train/value_avg": -0.012725830078125, + "objective/train/value_loss": 0.002034582896158099, + "objective/train/value_max": -0.0004546642303466797, + "objective/train/value_min": -0.828125, + "objective/train/value_reward_corr": 0.6498834431147285, + "objective/train/value_std": 0.0265045166015625, + "objective/train/weight_avg": 1.0002716779708862, + "objective/train/weighted_lm_loss": 2.482081174850464, + "objective/train/weights_max": 1.0278986692428589, + "objective/train/weights_min": 0.9137925505638123, + "theoretical_loss": 3.4930193049221785, + "tokens_seen": 1617100800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005149277688603532, + "loss": 1.4765, + "theoretical_loss": 3.492859431549663, + "tokens_seen": 1617952768 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": -0.0009170277626253664, + "objective/train/docs_used": 918149, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9232492446899414, + "objective/train/original_loss": 2.9232497215270996, + "objective/train/theoretical_loss": 3.4927119517414846, + "objective/train/tokens_used": 1639199200, + "objective/train/value_avg": -0.011383056640625, + "objective/train/value_loss": 0.0024784186389297247, + "objective/train/value_max": -0.0003936290740966797, + "objective/train/value_min": -0.260009765625, + "objective/train/value_reward_corr": 0.3003653885118827, + "objective/train/value_std": 0.012908935546875, + "objective/train/weight_avg": 0.9999205470085144, + "objective/train/weighted_lm_loss": 2.923266887664795, + "objective/train/weights_max": 1.0198267698287964, + "objective/train/weights_min": 0.9384546875953674, + "theoretical_loss": 3.4927119517414846, + "tokens_seen": 1618739200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005146067415730337, + "loss": 1.4666, + "theoretical_loss": 3.49266281218458, + "tokens_seen": 1619001344 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005142857142857142, + "loss": 1.4676, + "theoretical_loss": 3.49246635575248, + "tokens_seen": 1620049920 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.006086295936256647, + "objective/train/docs_used": 918561, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0101518630981445, + "objective/train/original_loss": 3.0101518630981445, + "objective/train/theoretical_loss": 3.4924049964934314, + "objective/train/tokens_used": 1640837600, + "objective/train/value_avg": -0.017181396484375, + "objective/train/value_loss": 0.0017239656299352646, + "objective/train/value_max": -0.0003447532653808594, + "objective/train/value_min": -0.7041015625, + "objective/train/value_reward_corr": 0.598717211337185, + "objective/train/value_std": 0.03839111328125, + "objective/train/weight_avg": 1.0006171464920044, + "objective/train/weighted_lm_loss": 3.012176513671875, + "objective/train/weights_max": 1.0399199724197388, + "objective/train/weights_min": 0.9333854913711548, + "theoretical_loss": 3.4924049964934314, + "tokens_seen": 1620377600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005139646869983948, + "loss": 1.4552, + "theoretical_loss": 3.4922700620130174, + "tokens_seen": 1621098496 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0011278983438387513, + "objective/train/docs_used": 919775, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.927858352661133, + "objective/train/original_loss": 2.927858829498291, + "objective/train/theoretical_loss": 3.4920984382612357, + "objective/train/tokens_used": 1642476000, + "objective/train/value_avg": -0.01351165771484375, + "objective/train/value_loss": 0.0028033358976244926, + "objective/train/value_max": -0.0004076957702636719, + "objective/train/value_min": -0.90869140625, + "objective/train/value_reward_corr": 0.46630148724500375, + "objective/train/value_std": 0.027557373046875, + "objective/train/weight_avg": 1.000126600265503, + "objective/train/weighted_lm_loss": 2.9278523921966553, + "objective/train/weights_max": 1.0419307947158813, + "objective/train/weights_min": 0.9290220141410828, + "theoretical_loss": 3.4920984382612357, + "tokens_seen": 1622016000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005136436597110754, + "loss": 1.4974, + "theoretical_loss": 3.492073930726355, + "tokens_seen": 1622147072 + }, + { + "epoch": 0.49, + "learning_rate": 0.000513322632423756, + "loss": 1.4836, + "theoretical_loss": 3.491877961653168, + "tokens_seen": 1623195648 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0019478084286674857, + "objective/train/docs_used": 920322, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.867671251296997, + "objective/train/original_loss": 2.867671251296997, + "objective/train/theoretical_loss": 3.491792276131147, + "objective/train/tokens_used": 1644114400, + "objective/train/value_avg": -0.0158538818359375, + "objective/train/value_loss": 0.0029206927865743637, + "objective/train/value_max": -0.00036263465881347656, + "objective/train/value_min": -0.91845703125, + "objective/train/value_reward_corr": 0.7347736236205545, + "objective/train/value_std": 0.04608154296875, + "objective/train/weight_avg": 1.0002092123031616, + "objective/train/weighted_lm_loss": 2.8686468601226807, + "objective/train/weights_max": 1.0679152011871338, + "objective/train/weights_min": 0.9181533455848694, + "theoretical_loss": 3.491792276131147, + "tokens_seen": 1623654400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005130016051364365, + "loss": 1.455, + "theoretical_loss": 3.4916821545546344, + "tokens_seen": 1624244224 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.005081588868051767, + "objective/train/docs_used": 921030, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.666759729385376, + "objective/train/original_loss": 2.6667604446411133, + "objective/train/theoretical_loss": 3.4914865091924394, + "objective/train/tokens_used": 1645752800, + "objective/train/value_avg": -0.016571044921875, + "objective/train/value_loss": 0.0015545723726972938, + "objective/train/value_max": -0.0008492469787597656, + "objective/train/value_min": -0.88720703125, + "objective/train/value_reward_corr": 0.3787136647661971, + "objective/train/value_std": 0.02520751953125, + "objective/train/weight_avg": 1.0005158185958862, + "objective/train/weighted_lm_loss": 2.6677563190460205, + "objective/train/weights_max": 1.0420008897781372, + "objective/train/weights_min": 0.9206088185310364, + "theoretical_loss": 3.4914865091924394, + "tokens_seen": 1625292800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005126805778491171, + "loss": 1.4264, + "theoretical_loss": 3.4914865091924394, + "tokens_seen": 1625292800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005123595505617977, + "loss": 1.4879, + "theoretical_loss": 3.4912910253287732, + "tokens_seen": 1626341376 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0023169072810560465, + "objective/train/docs_used": 922470, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.745408773422241, + "objective/train/original_loss": 2.7454090118408203, + "objective/train/theoretical_loss": 3.4911811365373957, + "objective/train/tokens_used": 1647391200, + "objective/train/value_avg": -0.0100860595703125, + "objective/train/value_loss": 0.0008465351420454681, + "objective/train/value_max": -0.0005617141723632812, + "objective/train/value_min": -0.105224609375, + "objective/train/value_reward_corr": 0.4072242159314371, + "objective/train/value_std": 0.0095672607421875, + "objective/train/weight_avg": 1.000235915184021, + "objective/train/weighted_lm_loss": 2.7466351985931396, + "objective/train/weights_max": 1.0091441869735718, + "objective/train/weights_min": 0.9305450916290283, + "theoretical_loss": 3.4911811365373957, + "tokens_seen": 1626931200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005120385232744784, + "loss": 1.4432, + "theoretical_loss": 3.4910957027263274, + "tokens_seen": 1627389952 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005117174959871589, + "loss": 1.4655, + "theoretical_loss": 3.490900541148295, + "tokens_seen": 1628438528 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.006120888516306877, + "objective/train/docs_used": 922615, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1333234310150146, + "objective/train/original_loss": 3.1333227157592773, + "objective/train/theoretical_loss": 3.4908761572612947, + "objective/train/tokens_used": 1649029600, + "objective/train/value_avg": -0.0106201171875, + "objective/train/value_loss": 0.0011693714186549187, + "objective/train/value_max": -0.00037407875061035156, + "objective/train/value_min": -0.78662109375, + "objective/train/value_reward_corr": 0.3206277798272567, + "objective/train/value_std": 0.016265869140625, + "objective/train/weight_avg": 1.0006178617477417, + "objective/train/weighted_lm_loss": 3.1353225708007812, + "objective/train/weights_max": 1.0508941411972046, + "objective/train/weights_min": 0.9224183559417725, + "theoretical_loss": 3.4908761572612947, + "tokens_seen": 1628569600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005113964686998395, + "loss": 1.4754, + "theoretical_loss": 3.490705540358369, + "tokens_seen": 1629487104 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.002974078059196472, + "objective/train/docs_used": 923942, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1235127449035645, + "objective/train/original_loss": 3.1235129833221436, + "objective/train/theoretical_loss": 3.490571570462399, + "objective/train/tokens_used": 1650668000, + "objective/train/value_avg": -0.016937255859375, + "objective/train/value_loss": 0.004323026165366173, + "objective/train/value_max": -0.00034332275390625, + "objective/train/value_min": -0.7744140625, + "objective/train/value_reward_corr": 0.3414356190928103, + "objective/train/value_std": 0.0275115966796875, + "objective/train/weight_avg": 1.0003186464309692, + "objective/train/weighted_lm_loss": 3.124218463897705, + "objective/train/weights_max": 1.0352452993392944, + "objective/train/weights_min": 0.913008987903595, + "theoretical_loss": 3.490571570462399, + "tokens_seen": 1630208000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005110754414125201, + "loss": 1.4975, + "theoretical_loss": 3.4905107001207414, + "tokens_seen": 1630535680 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005107544141252006, + "loss": 1.5064, + "theoretical_loss": 3.490316020200101, + "tokens_seen": 1631584256 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.01909417286515236, + "objective/train/docs_used": 924504, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.964604139328003, + "objective/train/original_loss": 2.964604139328003, + "objective/train/theoretical_loss": 3.4902673752419417, + "objective/train/tokens_used": 1652306400, + "objective/train/value_avg": -0.033203125, + "objective/train/value_loss": 0.003335321554914117, + "objective/train/value_max": -0.00046563148498535156, + "objective/train/value_min": -0.6455078125, + "objective/train/value_reward_corr": 0.8164566363994501, + "objective/train/value_std": 0.0889892578125, + "objective/train/weight_avg": 1.0019261837005615, + "objective/train/weighted_lm_loss": 2.970813274383545, + "objective/train/weights_max": 1.0557302236557007, + "objective/train/weights_min": 0.9785954356193542, + "theoretical_loss": 3.4902673752419417, + "tokens_seen": 1631846400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005104333868378812, + "loss": 1.4698, + "theoretical_loss": 3.4901215003616333, + "tokens_seen": 1632632832 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": -0.001691404148004949, + "objective/train/docs_used": 925849, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.066025972366333, + "objective/train/original_loss": 3.066025972366333, + "objective/train/theoretical_loss": 3.489963570704114, + "objective/train/tokens_used": 1653944800, + "objective/train/value_avg": -0.0197296142578125, + "objective/train/value_loss": 0.006206407677382231, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.970703125, + "objective/train/value_reward_corr": 0.8176306750167753, + "objective/train/value_std": 0.0634765625, + "objective/train/weight_avg": 0.9998612999916077, + "objective/train/weighted_lm_loss": 3.0663301944732666, + "objective/train/weights_max": 1.038800597190857, + "objective/train/weights_min": 0.9086558222770691, + "theoretical_loss": 3.489963570704114, + "tokens_seen": 1633484800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005101123595505618, + "loss": 1.4815, + "theoretical_loss": 3.4899271403710164, + "tokens_seen": 1633681408 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005097913322632424, + "loss": 1.4642, + "theoretical_loss": 3.4897329399944237, + "tokens_seen": 1634729984 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -0.006539641413837671, + "objective/train/docs_used": 926453, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.304609537124634, + "objective/train/original_loss": 3.304610013961792, + "objective/train/theoretical_loss": 3.4896601559560523, + "objective/train/tokens_used": 1655583200, + "objective/train/value_avg": -0.01268768310546875, + "objective/train/value_loss": 0.007315913215279579, + "objective/train/value_max": -0.0003418922424316406, + "objective/train/value_min": -0.5126953125, + "objective/train/value_reward_corr": 0.25756152826426737, + "objective/train/value_std": 0.0200653076171875, + "objective/train/weight_avg": 0.999381959438324, + "objective/train/weighted_lm_loss": 3.302457571029663, + "objective/train/weights_max": 1.0375921726226807, + "objective/train/weights_min": 0.9171932935714722, + "theoretical_loss": 3.4896601559560523, + "tokens_seen": 1635123200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005094703049759229, + "loss": 1.507, + "theoretical_loss": 3.4895388989985188, + "tokens_seen": 1635778560 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -0.01486304122954607, + "objective/train/docs_used": 927487, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9844088554382324, + "objective/train/original_loss": 2.9844086170196533, + "objective/train/theoretical_loss": 3.4893571301078246, + "objective/train/tokens_used": 1657221600, + "objective/train/value_avg": -0.0540771484375, + "objective/train/value_loss": 0.011326334439218044, + "objective/train/value_max": -0.0005617141723632812, + "objective/train/value_min": -0.962890625, + "objective/train/value_reward_corr": 0.8708790130616655, + "objective/train/value_std": 0.142822265625, + "objective/train/weight_avg": 0.9985693693161011, + "objective/train/weighted_lm_loss": 2.980548858642578, + "objective/train/weights_max": 1.085039734840393, + "objective/train/weights_min": 0.9084038138389587, + "theoretical_loss": 3.4893571301078246, + "tokens_seen": 1636761600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005091492776886035, + "loss": 1.5013, + "theoretical_loss": 3.4893450171504563, + "tokens_seen": 1636827136 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005088282504012841, + "loss": 1.4792, + "theoretical_loss": 3.4891512942178795, + "tokens_seen": 1637875712 + }, + { + "debugging/Self-BLEU-5": 0.299523828417223, + "debugging/distinct-1-grams": 0.7931298937164184, + "debugging/distinct-2-grams": 0.9722027972027973, + "debugging/entropy-1-grams": 5.372256762784476, + "debugging/entropy-2-grams": 5.925287792100262, + "debugging/length": 545.4, + "debugging/num_segments": 5, + "debugging/raw_token_scores_avg": 0.009862919338047504, + "debugging/raw_token_scores_std": 0.056920722126960754, + "epoch": 0.5, + "objective/train/advantage_avg": 0.005891862325370312, + "objective/train/docs_used": 928998, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2001116275787354, + "objective/train/original_loss": 3.2001118659973145, + "objective/train/theoretical_loss": 3.4890544922724205, + "objective/train/tokens_used": 1658860000, + "objective/train/value_avg": -0.0157623291015625, + "objective/train/value_loss": 0.002798998262733221, + "objective/train/value_max": -0.0005745887756347656, + "objective/train/value_min": -0.90234375, + "objective/train/value_reward_corr": 0.4042665634959788, + "objective/train/value_std": 0.030242919921875, + "objective/train/weight_avg": 1.0006029605865479, + "objective/train/weighted_lm_loss": 3.2020041942596436, + "objective/train/weights_max": 1.0550137758255005, + "objective/train/weights_min": 0.9297263622283936, + "theoretical_loss": 3.4890544922724205, + "tokens_seen": 1638400000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005085072231139646, + "loss": 1.4994, + "theoretical_loss": 3.48895772996892, + "tokens_seen": 1638924288 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005081861958266453, + "loss": 1.5089, + "theoretical_loss": 3.4887643241721955, + "tokens_seen": 1639972864 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0075013949535787106, + "objective/train/docs_used": 929588, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.010909080505371, + "objective/train/original_loss": 3.010908603668213, + "objective/train/theoretical_loss": 3.488752241565736, + "objective/train/tokens_used": 1660498400, + "objective/train/value_avg": -0.028289794921875, + "objective/train/value_loss": 0.0027737796772271395, + "objective/train/value_max": -0.0002694129943847656, + "objective/train/value_min": -0.970703125, + "objective/train/value_reward_corr": 0.6693262813085714, + "objective/train/value_std": 0.05670166015625, + "objective/train/weight_avg": 1.0007638931274414, + "objective/train/weighted_lm_loss": 3.012160301208496, + "objective/train/weights_max": 1.0372587442398071, + "objective/train/weights_min": 0.92678302526474, + "theoretical_loss": 3.488752241565736, + "tokens_seen": 1640038400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005078651685393259, + "loss": 1.4867, + "theoretical_loss": 3.4885710765968088, + "tokens_seen": 1641021440 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -0.0014422002714127302, + "objective/train/docs_used": 930445, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0675594806671143, + "objective/train/original_loss": 3.067559242248535, + "objective/train/theoretical_loss": 3.4884503771065636, + "objective/train/tokens_used": 1662136800, + "objective/train/value_avg": -0.01479339599609375, + "objective/train/value_loss": 0.0030291173607110977, + "objective/train/value_max": -0.00046372413635253906, + "objective/train/value_min": -0.8876953125, + "objective/train/value_reward_corr": 0.48609839554819256, + "objective/train/value_std": 0.0264739990234375, + "objective/train/weight_avg": 0.999870777130127, + "objective/train/weighted_lm_loss": 3.0669078826904297, + "objective/train/weights_max": 1.042985439300537, + "objective/train/weights_min": 0.9338280558586121, + "theoretical_loss": 3.4884503771065636, + "tokens_seen": 1641676800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005075441412520065, + "loss": 1.4771, + "theoretical_loss": 3.4883779870123455, + "tokens_seen": 1642070016 + }, + { + "epoch": 0.5, + "learning_rate": 0.000507223113964687, + "loss": 1.4953, + "theoretical_loss": 3.488185055188876, + "tokens_seen": 1643118592 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.00038371546543203294, + "objective/train/docs_used": 931444, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0905604362487793, + "objective/train/original_loss": 3.0905606746673584, + "objective/train/theoretical_loss": 3.4881488980165765, + "objective/train/tokens_used": 1663775200, + "objective/train/value_avg": -0.01543426513671875, + "objective/train/value_loss": 0.0030876845121383667, + "objective/train/value_max": -0.00047659873962402344, + "objective/train/value_min": -0.73681640625, + "objective/train/value_reward_corr": 0.4477117015471803, + "objective/train/value_std": 0.0212249755859375, + "objective/train/weight_avg": 1.0000535249710083, + "objective/train/weighted_lm_loss": 3.090200662612915, + "objective/train/weights_max": 1.0346497297286987, + "objective/train/weights_min": 0.9127119183540344, + "theoretical_loss": 3.4881488980165765, + "tokens_seen": 1643315200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005069020866773676, + "loss": 1.4725, + "theoretical_loss": 3.4879922808969486, + "tokens_seen": 1644167168 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.001529604778625071, + "objective/train/docs_used": 932028, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5140841007232666, + "objective/train/original_loss": 2.5140838623046875, + "objective/train/theoretical_loss": 3.48784780342032, + "objective/train/tokens_used": 1665413600, + "objective/train/value_avg": -0.018218994140625, + "objective/train/value_loss": 0.0038934529293328524, + "objective/train/value_max": -0.0005593299865722656, + "objective/train/value_min": -0.80810546875, + "objective/train/value_reward_corr": 0.33501616449447147, + "objective/train/value_std": 0.02899169921875, + "objective/train/weight_avg": 1.0001722574234009, + "objective/train/weighted_lm_loss": 2.5153191089630127, + "objective/train/weights_max": 1.08120596408844, + "objective/train/weights_min": 0.913263201713562, + "theoretical_loss": 3.48784780342032, + "tokens_seen": 1644953600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005065810593900482, + "loss": 1.4568, + "theoretical_loss": 3.4877996639075937, + "tokens_seen": 1645215744 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005062600321027287, + "loss": 1.4536, + "theoretical_loss": 3.4876072039923196, + "tokens_seen": 1646264320 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.006334267556667328, + "objective/train/docs_used": 933210, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.067915201187134, + "objective/train/original_loss": 3.067915201187134, + "objective/train/theoretical_loss": 3.487547092445197, + "objective/train/tokens_used": 1667052000, + "objective/train/value_avg": -0.01358795166015625, + "objective/train/value_loss": 0.002378677250817418, + "objective/train/value_max": -0.0003905296325683594, + "objective/train/value_min": -0.97265625, + "objective/train/value_reward_corr": 0.49590571507408293, + "objective/train/value_std": 0.0299072265625, + "objective/train/weight_avg": 1.0006451606750488, + "objective/train/weighted_lm_loss": 3.069646120071411, + "objective/train/weights_max": 1.099200963973999, + "objective/train/weights_min": 0.9076157212257385, + "theoretical_loss": 3.487547092445197, + "tokens_seen": 1646592000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005059390048154093, + "loss": 1.4852, + "theoretical_loss": 3.4874149009231123, + "tokens_seen": 1647312896 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.009625314734876156, + "objective/train/docs_used": 933788, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.437960624694824, + "objective/train/original_loss": 2.437960386276245, + "objective/train/theoretical_loss": 3.4872467642214566, + "objective/train/tokens_used": 1668690400, + "objective/train/value_avg": -0.01922607421875, + "objective/train/value_loss": 0.0013617334188893437, + "objective/train/value_max": -0.0003542900085449219, + "objective/train/value_min": -0.50732421875, + "objective/train/value_reward_corr": 0.4690699657494969, + "objective/train/value_std": 0.0276336669921875, + "objective/train/weight_avg": 1.0009692907333374, + "objective/train/weighted_lm_loss": 2.440439224243164, + "objective/train/weights_max": 1.031800627708435, + "objective/train/weights_min": 0.9325525164604187, + "theoretical_loss": 3.4872467642214566, + "tokens_seen": 1648230400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005056179775280899, + "loss": 1.4743, + "theoretical_loss": 3.4872227544724312, + "tokens_seen": 1648361472 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005052969502407705, + "loss": 1.4681, + "theoretical_loss": 3.487030764413214, + "tokens_seen": 1649410048 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.004136300645768642, + "objective/train/docs_used": 934285, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7398128509521484, + "objective/train/original_loss": 2.739812135696411, + "objective/train/theoretical_loss": 3.486946817882181, + "objective/train/tokens_used": 1670328800, + "objective/train/value_avg": -0.033477783203125, + "objective/train/value_loss": 0.005715727340430021, + "objective/train/value_max": -0.00039196014404296875, + "objective/train/value_min": -0.99072265625, + "objective/train/value_reward_corr": 0.562126906721049, + "objective/train/value_std": 0.06494140625, + "objective/train/weight_avg": 1.0004417896270752, + "objective/train/weighted_lm_loss": 2.7409093379974365, + "objective/train/weights_max": 1.0816709995269775, + "objective/train/weights_min": 0.9078468084335327, + "theoretical_loss": 3.486946817882181, + "tokens_seen": 1649868800 + }, + { + "epoch": 0.5, + "learning_rate": 0.000504975922953451, + "loss": 1.4819, + "theoretical_loss": 3.4868389305188687, + "tokens_seen": 1650458624 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.005100604612380266, + "objective/train/docs_used": 935410, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2894294261932373, + "objective/train/original_loss": 3.2894296646118164, + "objective/train/theoretical_loss": 3.4866472525632766, + "objective/train/tokens_used": 1671967200, + "objective/train/value_avg": -0.01084136962890625, + "objective/train/value_loss": 0.0005245616775937378, + "objective/train/value_max": -0.0002415180206298828, + "objective/train/value_min": -0.2398681640625, + "objective/train/value_reward_corr": 0.2511065024596255, + "objective/train/value_std": 0.01143646240234375, + "objective/train/weight_avg": 1.0005125999450684, + "objective/train/weighted_lm_loss": 3.2909910678863525, + "objective/train/weights_max": 1.020742416381836, + "objective/train/weights_min": 0.9614627361297607, + "theoretical_loss": 3.4866472525632766, + "tokens_seen": 1651507200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005046548956661316, + "loss": 1.4806, + "theoretical_loss": 3.4866472525632766, + "tokens_seen": 1651507200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005043338683788123, + "loss": 1.4982, + "theoretical_loss": 3.486455730320789, + "tokens_seen": 1652555776 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -0.0011591310612857342, + "objective/train/docs_used": 935942, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5945160388946533, + "objective/train/original_loss": 2.5945160388946533, + "objective/train/theoretical_loss": 3.486348067403457, + "objective/train/tokens_used": 1673605600, + "objective/train/value_avg": -0.028961181640625, + "objective/train/value_loss": 0.010482650250196457, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.98828125, + "objective/train/value_reward_corr": 0.7195489544455773, + "objective/train/value_std": 0.09869384765625, + "objective/train/weight_avg": 0.9999358057975769, + "objective/train/weighted_lm_loss": 2.5934271812438965, + "objective/train/weights_max": 1.0941760540008545, + "objective/train/weights_min": 0.906700849533081, + "theoretical_loss": 3.486348067403457, + "tokens_seen": 1653145600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005040128410914929, + "loss": 1.4828, + "theoretical_loss": 3.486264363566228, + "tokens_seen": 1653604352 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005036918138041734, + "loss": 1.4396, + "theoretical_loss": 3.4860731520748827, + "tokens_seen": 1654652928 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0011488988529890776, + "objective/train/docs_used": 937360, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.053104877471924, + "objective/train/original_loss": 3.0531046390533447, + "objective/train/theoretical_loss": 3.4860492615442356, + "objective/train/tokens_used": 1675244000, + "objective/train/value_avg": -0.0147552490234375, + "objective/train/value_loss": 0.002192664658650756, + "objective/train/value_max": -0.0003800392150878906, + "objective/train/value_min": -0.295166015625, + "objective/train/value_reward_corr": 0.46067385335070776, + "objective/train/value_std": 0.02166748046875, + "objective/train/weight_avg": 1.000125765800476, + "objective/train/weighted_lm_loss": 3.053474187850952, + "objective/train/weights_max": 1.025507926940918, + "objective/train/weights_min": 0.9475002288818359, + "theoretical_loss": 3.4860492615442356, + "tokens_seen": 1654784000 + }, + { + "epoch": 0.5, + "learning_rate": 0.000503370786516854, + "loss": 1.4377, + "theoretical_loss": 3.4858820956225083, + "tokens_seen": 1655701504 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0014618858695030212, + "objective/train/docs_used": 938134, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.4203341007232666, + "objective/train/original_loss": 3.4203343391418457, + "objective/train/theoretical_loss": 3.4857508341299117, + "objective/train/tokens_used": 1676882400, + "objective/train/value_avg": -0.015625, + "objective/train/value_loss": 0.0031566214747726917, + "objective/train/value_max": -0.00034999847412109375, + "objective/train/value_min": -0.8564453125, + "objective/train/value_reward_corr": 0.36270495632763977, + "objective/train/value_std": 0.0283660888671875, + "objective/train/weight_avg": 1.0001617670059204, + "objective/train/weighted_lm_loss": 3.4206762313842773, + "objective/train/weights_max": 1.08404541015625, + "objective/train/weights_min": 0.9063399434089661, + "theoretical_loss": 3.4857508341299117, + "tokens_seen": 1656422400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005030497592295346, + "loss": 1.4846, + "theoretical_loss": 3.4856911939853283, + "tokens_seen": 1656750080 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005027287319422151, + "loss": 1.5054, + "theoretical_loss": 3.485500446940028, + "tokens_seen": 1657798656 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0060430108569562435, + "objective/train/docs_used": 939068, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1128764152526855, + "objective/train/original_loss": 3.1128761768341064, + "objective/train/theoretical_loss": 3.485452784307559, + "objective/train/tokens_used": 1678520800, + "objective/train/value_avg": -0.048187255859375, + "objective/train/value_loss": 0.009888079017400742, + "objective/train/value_max": -0.0005211830139160156, + "objective/train/value_min": -0.9775390625, + "objective/train/value_reward_corr": 0.6990794956345723, + "objective/train/value_std": 0.1051025390625, + "objective/train/weight_avg": 1.0006530284881592, + "objective/train/weighted_lm_loss": 3.1149308681488037, + "objective/train/weights_max": 1.0726133584976196, + "objective/train/weights_min": 0.9076986312866211, + "theoretical_loss": 3.485452784307559, + "tokens_seen": 1658060800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005024077046548957, + "loss": 1.5013, + "theoretical_loss": 3.4853098542637566, + "tokens_seen": 1658847232 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -0.007393859326839447, + "objective/train/docs_used": 939826, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1327919960021973, + "objective/train/original_loss": 3.1327919960021973, + "objective/train/theoretical_loss": 3.4851551112270127, + "objective/train/tokens_used": 1680159200, + "objective/train/value_avg": -0.0251312255859375, + "objective/train/value_loss": 0.007353499997407198, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.91943359375, + "objective/train/value_reward_corr": 0.6032219134360434, + "objective/train/value_std": 0.05364990234375, + "objective/train/weight_avg": 0.9992967844009399, + "objective/train/weighted_lm_loss": 3.1298909187316895, + "objective/train/weights_max": 1.077232003211975, + "objective/train/weights_min": 0.9088017344474792, + "theoretical_loss": 3.4851551112270127, + "tokens_seen": 1659699200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005020866773675763, + "loss": 1.4687, + "theoretical_loss": 3.4851194157341263, + "tokens_seen": 1659895808 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005017656500802569, + "loss": 1.4986, + "theoretical_loss": 3.484929131129207, + "tokens_seen": 1660944384 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.00705959927290678, + "objective/train/docs_used": 941080, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.199772357940674, + "objective/train/original_loss": 3.1997721195220947, + "objective/train/theoretical_loss": 3.4848578140408613, + "objective/train/tokens_used": 1681797600, + "objective/train/value_avg": -0.0372314453125, + "objective/train/value_loss": 0.009380013681948185, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.9609375, + "objective/train/value_reward_corr": 0.5666295094806989, + "objective/train/value_std": 0.08758544921875, + "objective/train/weight_avg": 1.0007524490356445, + "objective/train/weighted_lm_loss": 3.202082872390747, + "objective/train/weights_max": 1.0543525218963623, + "objective/train/weights_min": 0.9091261029243469, + "theoretical_loss": 3.4848578140408613, + "tokens_seen": 1661337600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005014446227929374, + "loss": 1.4719, + "theoretical_loss": 3.484739000227532, + "tokens_seen": 1661992960 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0024663396179676056, + "objective/train/docs_used": 941634, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.196101665496826, + "objective/train/original_loss": 3.1961019039154053, + "objective/train/theoretical_loss": 3.4845608919044304, + "objective/train/tokens_used": 1683436000, + "objective/train/value_avg": -0.012237548828125, + "objective/train/value_loss": 0.0036586574278771877, + "objective/train/value_max": -0.00033926963806152344, + "objective/train/value_min": -0.9755859375, + "objective/train/value_reward_corr": 0.6343743278661391, + "objective/train/value_std": 0.0364990234375, + "objective/train/weight_avg": 1.0002645254135132, + "objective/train/weighted_lm_loss": 3.1962857246398926, + "objective/train/weights_max": 1.0461291074752808, + "objective/train/weights_min": 0.9074264168739319, + "theoretical_loss": 3.4845608919044304, + "tokens_seen": 1662976000 + }, + { + "epoch": 0.5, + "learning_rate": 0.000501123595505618, + "loss": 1.4889, + "theoretical_loss": 3.48454902280809, + "tokens_seen": 1663041536 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005008025682182986, + "loss": 1.5291, + "theoretical_loss": 3.484359198650326, + "tokens_seen": 1664090112 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0027365998830646276, + "objective/train/docs_used": 943199, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.234862804412842, + "objective/train/original_loss": 3.23486328125, + "objective/train/theoretical_loss": 3.4842643439757754, + "objective/train/tokens_used": 1685074400, + "objective/train/value_avg": -0.00844573974609375, + "objective/train/value_loss": 0.0019241574918851256, + "objective/train/value_max": -0.0003829002380371094, + "objective/train/value_min": -0.98046875, + "objective/train/value_reward_corr": 0.35799632444314466, + "objective/train/value_std": 0.0242462158203125, + "objective/train/weight_avg": 1.000283122062683, + "objective/train/weighted_lm_loss": 3.2358689308166504, + "objective/train/weights_max": 1.0610893964767456, + "objective/train/weights_min": 0.9060874581336975, + "theoretical_loss": 3.4842643439757754, + "tokens_seen": 1664614400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005004815409309791, + "loss": 1.4949, + "theoretical_loss": 3.484169527534143, + "tokens_seen": 1665138688 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005001605136436598, + "loss": 1.4426, + "theoretical_loss": 3.4839800092398967, + "tokens_seen": 1666187264 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.012588986195623875, + "objective/train/docs_used": 943856, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.000092029571533, + "objective/train/original_loss": 3.0000922679901123, + "objective/train/theoretical_loss": 3.4839681694156663, + "objective/train/tokens_used": 1686712800, + "objective/train/value_avg": -0.02978515625, + "objective/train/value_loss": 0.0028915267903357744, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.572265625, + "objective/train/value_reward_corr": 0.6200259910059966, + "objective/train/value_std": 0.062347412109375, + "objective/train/weight_avg": 1.0012733936309814, + "objective/train/weighted_lm_loss": 3.0042171478271484, + "objective/train/weights_max": 1.0541728734970093, + "objective/train/weights_min": 0.943401038646698, + "theoretical_loss": 3.4839681694156663, + "tokens_seen": 1666252800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004998394863563403, + "loss": 1.4505, + "theoretical_loss": 3.4837906435483967, + "tokens_seen": 1667235840 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.007167427334934473, + "objective/train/docs_used": 945126, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0356602668762207, + "objective/train/original_loss": 3.035660743713379, + "objective/train/theoretical_loss": 3.4836723673875793, + "objective/train/tokens_used": 1688351200, + "objective/train/value_avg": -0.0188446044921875, + "objective/train/value_loss": 0.0029941601678729057, + "objective/train/value_max": -0.0004372596740722656, + "objective/train/value_min": -0.89013671875, + "objective/train/value_reward_corr": 0.5783564445720766, + "objective/train/value_std": 0.037322998046875, + "objective/train/weight_avg": 1.0007314682006836, + "objective/train/weighted_lm_loss": 3.0375006198883057, + "objective/train/weights_max": 1.0388540029525757, + "objective/train/weights_min": 0.9245582818984985, + "theoretical_loss": 3.4836723673875793, + "tokens_seen": 1667891200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004995184590690209, + "loss": 1.4696, + "theoretical_loss": 3.4836014302409053, + "tokens_seen": 1668284416 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004991974317817015, + "loss": 1.4512, + "theoretical_loss": 3.4834123690991334, + "tokens_seen": 1669332992 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.008454298600554466, + "objective/train/docs_used": 945699, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7502992153167725, + "objective/train/original_loss": 2.7502992153167725, + "objective/train/theoretical_loss": 3.4833769370576846, + "objective/train/tokens_used": 1689989600, + "objective/train/value_avg": -0.01727294921875, + "objective/train/value_loss": 0.0006038036081008613, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.42333984375, + "objective/train/value_reward_corr": 0.4509773045317801, + "objective/train/value_std": 0.02105712890625, + "objective/train/weight_avg": 1.000848412513733, + "objective/train/weighted_lm_loss": 2.752190351486206, + "objective/train/weights_max": 1.0430610179901123, + "objective/train/weights_min": 0.9551413655281067, + "theoretical_loss": 3.4833769370576846, + "tokens_seen": 1669529600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004988764044943821, + "loss": 1.4886, + "theoretical_loss": 3.4832234599052434, + "tokens_seen": 1670381568 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.002837319392710924, + "objective/train/docs_used": 946326, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.103632688522339, + "objective/train/original_loss": 3.103632688522339, + "objective/train/theoretical_loss": 3.4830818775948327, + "objective/train/tokens_used": 1691628000, + "objective/train/value_avg": -0.01125335693359375, + "objective/train/value_loss": 0.002730586798861623, + "objective/train/value_max": -0.0005908012390136719, + "objective/train/value_min": -0.8330078125, + "objective/train/value_reward_corr": 0.21090303558413154, + "objective/train/value_std": 0.0189056396484375, + "objective/train/weight_avg": 1.00029718875885, + "objective/train/weighted_lm_loss": 3.1049294471740723, + "objective/train/weights_max": 1.0356985330581665, + "objective/train/weights_min": 0.9078012108802795, + "theoretical_loss": 3.4830818775948327, + "tokens_seen": 1671168000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004985553772070626, + "loss": 1.4762, + "theoretical_loss": 3.483034702441845, + "tokens_seen": 1671430144 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004982343499197432, + "loss": 1.4907, + "theoretical_loss": 3.4828460964919965, + "tokens_seen": 1672478720 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00030682721990160644, + "objective/train/docs_used": 947668, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.670909881591797, + "objective/train/original_loss": 2.670910120010376, + "objective/train/theoretical_loss": 3.482787188170547, + "objective/train/tokens_used": 1693266400, + "objective/train/value_avg": -0.0170440673828125, + "objective/train/value_loss": 0.007754676975309849, + "objective/train/value_max": -0.0004012584686279297, + "objective/train/value_min": -0.8828125, + "objective/train/value_reward_corr": 0.6336828339191556, + "objective/train/value_std": 0.048431396484375, + "objective/train/weight_avg": 1.0000685453414917, + "objective/train/weighted_lm_loss": 2.670686960220337, + "objective/train/weights_max": 1.0507326126098633, + "objective/train/weights_min": 0.9100270867347717, + "theoretical_loss": 3.482787188170547, + "tokens_seen": 1672806400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004979133226324238, + "loss": 1.4708, + "theoretical_loss": 3.4826576418392, + "tokens_seen": 1673527296 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.012121754698455334, + "objective/train/docs_used": 948408, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0302348136901855, + "objective/train/original_loss": 3.0302345752716064, + "objective/train/theoretical_loss": 3.48249286795901, + "objective/train/tokens_used": 1694904800, + "objective/train/value_avg": -0.035858154296875, + "objective/train/value_loss": 0.00962271261960268, + "objective/train/value_max": -0.0003380775451660156, + "objective/train/value_min": -0.7734375, + "objective/train/value_reward_corr": 0.9216090131237914, + "objective/train/value_std": 0.1041259765625, + "objective/train/weight_avg": 0.9988351464271545, + "objective/train/weighted_lm_loss": 3.028827667236328, + "objective/train/weights_max": 1.0484718084335327, + "objective/train/weights_min": 0.910431981086731, + "theoretical_loss": 3.48249286795901, + "tokens_seen": 1674444800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004975922953451043, + "loss": 1.4387, + "theoretical_loss": 3.4824693382674043, + "tokens_seen": 1674575872 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004972712680577849, + "loss": 1.4498, + "theoretical_loss": 3.4822811855610007, + "tokens_seen": 1675624448 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.0032309736125171185, + "objective/train/docs_used": 949481, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8510870933532715, + "objective/train/original_loss": 2.8510873317718506, + "objective/train/theoretical_loss": 3.4821989161370532, + "objective/train/tokens_used": 1696543200, + "objective/train/value_avg": -0.0129241943359375, + "objective/train/value_loss": 0.0010368363000452518, + "objective/train/value_max": -0.0005154609680175781, + "objective/train/value_min": -0.44580078125, + "objective/train/value_reward_corr": 0.511823461926171, + "objective/train/value_std": 0.0155792236328125, + "objective/train/weight_avg": 1.0003283023834229, + "objective/train/weighted_lm_loss": 2.852421760559082, + "objective/train/weights_max": 1.0149881839752197, + "objective/train/weights_min": 0.95500248670578, + "theoretical_loss": 3.4821989161370532, + "tokens_seen": 1676083200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004969502407704655, + "loss": 1.4651, + "theoretical_loss": 3.4820931835048223, + "tokens_seen": 1676673024 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.0029279510490596294, + "objective/train/docs_used": 950694, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.225564956665039, + "objective/train/original_loss": 3.225564956665039, + "objective/train/theoretical_loss": 3.4819053318841453, + "objective/train/tokens_used": 1698181600, + "objective/train/value_avg": -0.057403564453125, + "objective/train/value_loss": 0.008156288415193558, + "objective/train/value_max": -0.0005297660827636719, + "objective/train/value_min": -0.97021484375, + "objective/train/value_reward_corr": 0.9221986662421587, + "objective/train/value_std": 0.1917724609375, + "objective/train/weight_avg": 0.9997475743293762, + "objective/train/weighted_lm_loss": 3.2259368896484375, + "objective/train/weights_max": 1.0903840065002441, + "objective/train/weights_min": 0.9076246619224548, + "theoretical_loss": 3.4819053318841453, + "tokens_seen": 1677721600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004966292134831462, + "loss": 1.4718, + "theoretical_loss": 3.4819053318841453, + "tokens_seen": 1677721600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004963081861958267, + "loss": 1.4416, + "theoretical_loss": 3.481717630484684, + "tokens_seen": 1678770176 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.007869364693760872, + "objective/train/docs_used": 951339, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7344202995300293, + "objective/train/original_loss": 2.7344202995300293, + "objective/train/theoretical_loss": 3.481612114382382, + "objective/train/tokens_used": 1699820000, + "objective/train/value_avg": -0.01302337646484375, + "objective/train/value_loss": 0.0012891741935163736, + "objective/train/value_max": -0.0003554821014404297, + "objective/train/value_min": -0.401611328125, + "objective/train/value_reward_corr": 0.1591225416598568, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.000793218612671, + "objective/train/weighted_lm_loss": 2.735701322555542, + "objective/train/weights_max": 1.020862340927124, + "objective/train/weights_min": 0.9210525751113892, + "theoretical_loss": 3.481612114382382, + "tokens_seen": 1679360000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004959871589085072, + "loss": 1.4677, + "theoretical_loss": 3.481530079092593, + "tokens_seen": 1679818752 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004956661316211878, + "loss": 1.4669, + "theoretical_loss": 3.481342677494464, + "tokens_seen": 1680867328 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00230806483887136, + "objective/train/docs_used": 952727, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8643884658813477, + "objective/train/original_loss": 2.8643884658813477, + "objective/train/theoretical_loss": 3.481319262816474, + "objective/train/tokens_used": 1701458400, + "objective/train/value_avg": -0.016357421875, + "objective/train/value_loss": 0.005601909011602402, + "objective/train/value_max": -0.00033020973205566406, + "objective/train/value_min": -0.95751953125, + "objective/train/value_reward_corr": 0.34567999240429, + "objective/train/value_std": 0.031890869140625, + "objective/train/weight_avg": 1.000258207321167, + "objective/train/weighted_lm_loss": 2.8632524013519287, + "objective/train/weights_max": 1.0604702234268188, + "objective/train/weights_min": 0.9089450836181641, + "theoretical_loss": 3.481319262816474, + "tokens_seen": 1680998400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004953451043338684, + "loss": 1.4938, + "theoretical_loss": 3.4811554254773243, + "tokens_seen": 1681915904 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.006023095455020666, + "objective/train/docs_used": 953335, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.239837646484375, + "objective/train/original_loss": 3.239837646484375, + "objective/train/theoretical_loss": 3.4810267763737377, + "objective/train/tokens_used": 1703096800, + "objective/train/value_avg": -0.026153564453125, + "objective/train/value_loss": 0.005466919858008623, + "objective/train/value_max": -0.0004973411560058594, + "objective/train/value_min": -0.97216796875, + "objective/train/value_reward_corr": 0.7055831601234022, + "objective/train/value_std": 0.058837890625, + "objective/train/weight_avg": 0.9994247555732727, + "objective/train/weighted_lm_loss": 3.236931800842285, + "objective/train/weights_max": 1.0503756999969482, + "objective/train/weights_min": 0.9419988393783569, + "theoretical_loss": 3.4810267763737377, + "tokens_seen": 1682636800 + }, + { + "epoch": 0.51, + "learning_rate": 0.000495024077046549, + "loss": 1.4502, + "theoretical_loss": 3.4809683228286374, + "tokens_seen": 1682964480 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004947030497592295, + "loss": 1.4519, + "theoretical_loss": 3.4807813693363023, + "tokens_seen": 1684013056 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.0010723185259848833, + "objective/train/docs_used": 954676, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8913180828094482, + "objective/train/original_loss": 2.8913183212280273, + "objective/train/theoretical_loss": 3.4807346542440833, + "objective/train/tokens_used": 1704735200, + "objective/train/value_avg": -0.01678466796875, + "objective/train/value_loss": 0.005359402392059565, + "objective/train/value_max": -0.0005011558532714844, + "objective/train/value_min": -0.91748046875, + "objective/train/value_reward_corr": 0.5720582586806027, + "objective/train/value_std": 0.046783447265625, + "objective/train/weight_avg": 1.0001335144042969, + "objective/train/weighted_lm_loss": 2.8923733234405518, + "objective/train/weights_max": 1.0508155822753906, + "objective/train/weights_min": 0.9097607731819153, + "theoretical_loss": 3.4807346542440833, + "tokens_seen": 1684275200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004943820224719101, + "loss": 1.4638, + "theoretical_loss": 3.480594564788648, + "tokens_seen": 1685061632 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.027678780257701874, + "objective/train/docs_used": 955358, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2343530654907227, + "objective/train/original_loss": 3.2343528270721436, + "objective/train/theoretical_loss": 3.480442895620004, + "objective/train/tokens_used": 1706373600, + "objective/train/value_avg": -0.09588623046875, + "objective/train/value_loss": 0.01705680973827839, + "objective/train/value_max": -0.00020182132720947266, + "objective/train/value_min": -0.96533203125, + "objective/train/value_reward_corr": 0.8273134740354195, + "objective/train/value_std": 0.1927490234375, + "objective/train/weight_avg": 0.997316837310791, + "objective/train/weighted_lm_loss": 3.224318265914917, + "objective/train/weights_max": 1.085200548171997, + "objective/train/weights_min": 0.9181315302848816, + "theoretical_loss": 3.480442895620004, + "tokens_seen": 1685913600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004940609951845907, + "loss": 1.4859, + "theoretical_loss": 3.4804079089744375, + "tokens_seen": 1686110208 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004937399678972713, + "loss": 1.4647, + "theoretical_loss": 3.4802214016828636, + "tokens_seen": 1687158784 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.0032544462010264397, + "objective/train/docs_used": 956671, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2233269214630127, + "objective/train/original_loss": 3.2233269214630127, + "objective/train/theoretical_loss": 3.4801514996965643, + "objective/train/tokens_used": 1708012000, + "objective/train/value_avg": -0.01404571533203125, + "objective/train/value_loss": 0.002659498481079936, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.89306640625, + "objective/train/value_reward_corr": 0.5356545643482479, + "objective/train/value_std": 0.031494140625, + "objective/train/weight_avg": 1.0003385543823242, + "objective/train/weighted_lm_loss": 3.224220037460327, + "objective/train/weights_max": 1.0498566627502441, + "objective/train/weights_min": 0.9093957543373108, + "theoretical_loss": 3.4801514996965643, + "tokens_seen": 1687552000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004934189406099518, + "loss": 1.4786, + "theoretical_loss": 3.48003504270355, + "tokens_seen": 1688207360 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.004350392147898674, + "objective/train/docs_used": 957370, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.4311819076538086, + "objective/train/original_loss": 2.4311816692352295, + "objective/train/theoretical_loss": 3.4798604656713916, + "objective/train/tokens_used": 1709650400, + "objective/train/value_avg": -0.01422882080078125, + "objective/train/value_loss": 0.002146370243281126, + "objective/train/value_max": -0.0004935264587402344, + "objective/train/value_min": -0.93310546875, + "objective/train/value_reward_corr": 0.33607238783542415, + "objective/train/value_std": 0.0193939208984375, + "objective/train/weight_avg": 1.0004454851150513, + "objective/train/weighted_lm_loss": 2.4320600032806396, + "objective/train/weights_max": 1.0256012678146362, + "objective/train/weights_min": 0.9117777347564697, + "theoretical_loss": 3.4798604656713916, + "tokens_seen": 1689190400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004930979133226324, + "loss": 1.4467, + "theoretical_loss": 3.4798488318265477, + "tokens_seen": 1689255936 + }, + { + "epoch": 0.51, + "learning_rate": 0.000492776886035313, + "loss": 1.4594, + "theoretical_loss": 3.479662768842334, + "tokens_seen": 1690304512 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.0012057207059115171, + "objective/train/docs_used": 957881, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1867973804473877, + "objective/train/original_loss": 3.1867973804473877, + "objective/train/theoretical_loss": 3.4795697927446643, + "objective/train/tokens_used": 1711288800, + "objective/train/value_avg": -0.01418304443359375, + "objective/train/value_loss": 0.006389199756085873, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.97607421875, + "objective/train/value_reward_corr": 0.5711019560600794, + "objective/train/value_std": 0.0406494140625, + "objective/train/weight_avg": 0.9999106526374817, + "objective/train/weighted_lm_loss": 3.185983896255493, + "objective/train/weights_max": 1.0360890626907349, + "objective/train/weights_min": 0.911036491394043, + "theoretical_loss": 3.4795697927446643, + "tokens_seen": 1690828800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004924558587479935, + "loss": 1.4439, + "theoretical_loss": 3.4794768535418146, + "tokens_seen": 1691353088 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004921348314606742, + "loss": 1.467, + "theoretical_loss": 3.4792910857163193, + "tokens_seen": 1692401664 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.00252800271846354, + "objective/train/docs_used": 959159, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.092756748199463, + "objective/train/original_loss": 3.092756986618042, + "objective/train/theoretical_loss": 3.479279480119101, + "objective/train/tokens_used": 1712927200, + "objective/train/value_avg": -0.0166473388671875, + "objective/train/value_loss": 0.0059075490571558475, + "objective/train/value_max": -0.00034999847412109375, + "objective/train/value_min": -0.7890625, + "objective/train/value_reward_corr": 0.5553052333616528, + "objective/train/value_std": 0.028411865234375, + "objective/train/weight_avg": 0.9997762441635132, + "objective/train/weighted_lm_loss": 3.091820240020752, + "objective/train/weights_max": 1.0819463729858398, + "objective/train/weights_min": 0.910106897354126, + "theoretical_loss": 3.479279480119101, + "tokens_seen": 1692467200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004918138041733548, + "loss": 1.4968, + "theoretical_loss": 3.4791054651576006, + "tokens_seen": 1693450240 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.0011875793570652604, + "objective/train/docs_used": 959847, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.719555139541626, + "objective/train/original_loss": 2.719555616378784, + "objective/train/theoretical_loss": 3.4789895269999507, + "objective/train/tokens_used": 1714565600, + "objective/train/value_avg": -0.0109405517578125, + "objective/train/value_loss": 0.0017533503705635667, + "objective/train/value_max": -0.00036835670471191406, + "objective/train/value_min": -0.90185546875, + "objective/train/value_reward_corr": 0.360302873302638, + "objective/train/value_std": 0.0178375244140625, + "objective/train/weight_avg": 0.9998898506164551, + "objective/train/weighted_lm_loss": 2.7199466228485107, + "objective/train/weights_max": 1.032105565071106, + "objective/train/weights_min": 0.9103373885154724, + "theoretical_loss": 3.4789895269999507, + "tokens_seen": 1694105600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004914927768860354, + "loss": 1.4587, + "theoretical_loss": 3.4789199916578353, + "tokens_seen": 1694498816 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004911717495987159, + "loss": 1.484, + "theoretical_loss": 3.478734665009622, + "tokens_seen": 1695547392 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.006571109872311354, + "objective/train/docs_used": 961177, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0664706230163574, + "objective/train/original_loss": 3.0664706230163574, + "objective/train/theoretical_loss": 3.4786999325949806, + "objective/train/tokens_used": 1716204000, + "objective/train/value_avg": -0.0113372802734375, + "objective/train/value_loss": 0.0010425254004076123, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.5966796875, + "objective/train/value_reward_corr": 0.44164718667993225, + "objective/train/value_std": 0.0179595947265625, + "objective/train/weight_avg": 1.0006622076034546, + "objective/train/weighted_lm_loss": 3.0684478282928467, + "objective/train/weights_max": 1.0530592203140259, + "objective/train/weights_min": 0.9300150275230408, + "theoretical_loss": 3.4786999325949806, + "tokens_seen": 1695744000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004908507223113965, + "loss": 1.4779, + "theoretical_loss": 3.4785494850059786, + "tokens_seen": 1696595968 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00029152847127988935, + "objective/train/docs_used": 961828, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0143356323242188, + "objective/train/original_loss": 3.0143356323242188, + "objective/train/theoretical_loss": 3.478410696114469, + "objective/train/tokens_used": 1717842400, + "objective/train/value_avg": -0.0171356201171875, + "objective/train/value_loss": 0.006121884565800428, + "objective/train/value_max": -0.0004093647003173828, + "objective/train/value_min": -0.94970703125, + "objective/train/value_reward_corr": 0.592674362862746, + "objective/train/value_std": 0.045928955078125, + "objective/train/weight_avg": 1.0000591278076172, + "objective/train/weighted_lm_loss": 3.014108896255493, + "objective/train/weights_max": 1.057953119277954, + "objective/train/weights_min": 0.9072171449661255, + "theoretical_loss": 3.478410696114469, + "tokens_seen": 1697382400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004905296950240771, + "loss": 1.4706, + "theoretical_loss": 3.478364451440343, + "tokens_seen": 1697644544 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004902086677367576, + "loss": 1.4868, + "theoretical_loss": 3.478179564106571, + "tokens_seen": 1698693120 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.00031381507869809866, + "objective/train/docs_used": 963095, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.714380979537964, + "objective/train/original_loss": 2.714380979537964, + "objective/train/theoretical_loss": 3.478121816771191, + "objective/train/tokens_used": 1719480800, + "objective/train/value_avg": -0.01227569580078125, + "objective/train/value_loss": 0.0021819057874381542, + "objective/train/value_max": -0.000614166259765625, + "objective/train/value_min": -0.87841796875, + "objective/train/value_reward_corr": 0.2537020827285947, + "objective/train/value_std": 0.0241851806640625, + "objective/train/weight_avg": 0.9999794363975525, + "objective/train/weighted_lm_loss": 2.714099168777466, + "objective/train/weights_max": 1.0604537725448608, + "objective/train/weights_min": 0.9101231098175049, + "theoretical_loss": 3.478121816771191, + "tokens_seen": 1699020800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004898876404494382, + "loss": 1.5032, + "theoretical_loss": 3.4779948227989372, + "tokens_seen": 1699741696 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.0029426771216094494, + "objective/train/docs_used": 963811, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9805972576141357, + "objective/train/original_loss": 2.9805967807769775, + "objective/train/theoretical_loss": 3.477833293780412, + "objective/train/tokens_used": 1721119200, + "objective/train/value_avg": -0.01477813720703125, + "objective/train/value_loss": 0.002911105053499341, + "objective/train/value_max": -0.0005173683166503906, + "objective/train/value_min": -0.451904296875, + "objective/train/value_reward_corr": 0.3568706090455394, + "objective/train/value_std": 0.022613525390625, + "objective/train/weight_avg": 0.9997200965881348, + "objective/train/weighted_lm_loss": 2.980379104614258, + "objective/train/weights_max": 1.0321226119995117, + "objective/train/weights_min": 0.9310606718063354, + "theoretical_loss": 3.477833293780412, + "tokens_seen": 1700659200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004895666131621188, + "loss": 1.4726, + "theoretical_loss": 3.47781022731213, + "tokens_seen": 1700790272 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004892455858747994, + "loss": 1.472, + "theoretical_loss": 3.4776257774412547, + "tokens_seen": 1701838848 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.0007934308378025889, + "objective/train/docs_used": 965136, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.6225104331970215, + "objective/train/original_loss": 2.6225104331970215, + "objective/train/theoretical_loss": 3.477545126359873, + "objective/train/tokens_used": 1722757600, + "objective/train/value_avg": -0.0272064208984375, + "objective/train/value_loss": 0.007469208911061287, + "objective/train/value_max": -0.0004355907440185547, + "objective/train/value_min": -0.49560546875, + "objective/train/value_reward_corr": 0.4758638338985398, + "objective/train/value_std": 0.039825439453125, + "objective/train/weight_avg": 0.9999573230743408, + "objective/train/weighted_lm_loss": 2.620652675628662, + "objective/train/weights_max": 1.0265201330184937, + "objective/train/weights_min": 0.9140478372573853, + "theoretical_loss": 3.477545126359873, + "tokens_seen": 1702297600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004889245585874799, + "loss": 1.4721, + "theoretical_loss": 3.4774414729818295, + "tokens_seen": 1702887424 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.00829350110143423, + "objective/train/docs_used": 965825, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7842636108398438, + "objective/train/original_loss": 2.784263849258423, + "objective/train/theoretical_loss": 3.477257313729786, + "objective/train/tokens_used": 1724396000, + "objective/train/value_avg": -0.038818359375, + "objective/train/value_loss": 0.006241111550480127, + "objective/train/value_max": -0.0002491474151611328, + "objective/train/value_min": -0.94921875, + "objective/train/value_reward_corr": 0.6438658385709968, + "objective/train/value_std": 0.07275390625, + "objective/train/weight_avg": 1.0008602142333984, + "objective/train/weighted_lm_loss": 2.787231206893921, + "objective/train/weights_max": 1.048689365386963, + "objective/train/weights_min": 0.9076323509216309, + "theoretical_loss": 3.477257313729786, + "tokens_seen": 1703936000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004886035313001605, + "loss": 1.482, + "theoretical_loss": 3.477257313729786, + "tokens_seen": 1703936000 + }, + { + "epoch": 0.52, + "learning_rate": 0.000488282504012841, + "loss": 1.4868, + "theoretical_loss": 3.477073299481467, + "tokens_seen": 1704984576 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.007320534437894821, + "objective/train/docs_used": 966739, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5670926570892334, + "objective/train/original_loss": 2.5670926570892334, + "objective/train/theoretical_loss": 3.4769698551128188, + "objective/train/tokens_used": 1726034400, + "objective/train/value_avg": -0.025665283203125, + "objective/train/value_loss": 0.013002671301364899, + "objective/train/value_max": -0.00048041343688964844, + "objective/train/value_min": -0.93896484375, + "objective/train/value_reward_corr": 0.6076154299077352, + "objective/train/value_std": 0.06402587890625, + "objective/train/weight_avg": 0.999331533908844, + "objective/train/weighted_lm_loss": 2.563349962234497, + "objective/train/weights_max": 1.088139533996582, + "objective/train/weights_min": 0.9069458246231079, + "theoretical_loss": 3.4769698551128188, + "tokens_seen": 1705574400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048796147672552166, + "loss": 1.4705, + "theoretical_loss": 3.4768894300336264, + "tokens_seen": 1706033152 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004876404494382022, + "loss": 1.4569, + "theoretical_loss": 3.476705705183427, + "tokens_seen": 1707081728 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.013997827656567097, + "objective/train/docs_used": 967356, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.0847256183624268, + "objective/train/original_loss": 3.0847256183624268, + "objective/train/theoretical_loss": 3.4766827497340875, + "objective/train/tokens_used": 1727672800, + "objective/train/value_avg": -0.0498046875, + "objective/train/value_loss": 0.017563194036483765, + "objective/train/value_max": -0.0004494190216064453, + "objective/train/value_min": -0.95751953125, + "objective/train/value_reward_corr": 0.6315744637309102, + "objective/train/value_std": 0.0950927734375, + "objective/train/weight_avg": 0.9986869096755981, + "objective/train/weighted_lm_loss": 3.0862550735473633, + "objective/train/weights_max": 1.0833147764205933, + "objective/train/weights_min": 0.9123354554176331, + "theoretical_loss": 3.4766827497340875, + "tokens_seen": 1707212800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048731942215088283, + "loss": 1.4453, + "theoretical_loss": 3.4765221247284415, + "tokens_seen": 1708130304 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.00737491762265563, + "objective/train/docs_used": 967795, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.572505474090576, + "objective/train/original_loss": 2.572504997253418, + "objective/train/theoretical_loss": 3.476395996821146, + "objective/train/tokens_used": 1729311200, + "objective/train/value_avg": -0.0173492431640625, + "objective/train/value_loss": 0.0049330550245940685, + "objective/train/value_max": -0.0004839897155761719, + "objective/train/value_min": -0.2454833984375, + "objective/train/value_reward_corr": 0.39458798270312767, + "objective/train/value_std": 0.0208740234375, + "objective/train/weight_avg": 0.999286949634552, + "objective/train/weighted_lm_loss": 2.5718259811401367, + "objective/train/weights_max": 1.0183378458023071, + "objective/train/weights_min": 0.9306320548057556, + "theoretical_loss": 3.476395996821146, + "tokens_seen": 1708851200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004869983948635634, + "loss": 1.4891, + "theoretical_loss": 3.4763386884666483, + "tokens_seen": 1709178880 + }, + { + "epoch": 0.52, + "learning_rate": 0.000486677367576244, + "loss": 1.462, + "theoretical_loss": 3.4761553961964338, + "tokens_seen": 1710227456 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.007176132407039404, + "objective/train/docs_used": 968886, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.6584339141845703, + "objective/train/original_loss": 3.658433437347412, + "objective/train/theoretical_loss": 3.476109595603976, + "objective/train/tokens_used": 1730949600, + "objective/train/value_avg": -0.0125274658203125, + "objective/train/value_loss": 0.0009491578675806522, + "objective/train/value_max": -0.0004878044128417969, + "objective/train/value_min": -0.345703125, + "objective/train/value_reward_corr": 0.34905009451027, + "objective/train/value_std": 0.0175628662109375, + "objective/train/weight_avg": 1.0007222890853882, + "objective/train/weighted_lm_loss": 3.6610946655273438, + "objective/train/weights_max": 1.034816026687622, + "objective/train/weights_min": 0.9168787002563477, + "theoretical_loss": 3.476109595603976, + "tokens_seen": 1710489600 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048635634028892455, + "loss": 1.4824, + "theoretical_loss": 3.475972247716588, + "tokens_seen": 1711276032 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.002592177828773856, + "objective/train/docs_used": 969614, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.184375762939453, + "objective/train/original_loss": 3.184375524520874, + "objective/train/theoretical_loss": 3.4758235453149764, + "objective/train/tokens_used": 1732588000, + "objective/train/value_avg": -0.01221466064453125, + "objective/train/value_loss": 0.0020440074149519205, + "objective/train/value_max": -0.00047850608825683594, + "objective/train/value_min": -0.75927734375, + "objective/train/value_reward_corr": 0.33088068076176913, + "objective/train/value_std": 0.02020263671875, + "objective/train/weight_avg": 1.0002694129943848, + "objective/train/weighted_lm_loss": 3.185084819793701, + "objective/train/weights_max": 1.0341957807540894, + "objective/train/weights_min": 0.9454989433288574, + "theoretical_loss": 3.4758235453149764, + "tokens_seen": 1712128000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048603531300160516, + "loss": 1.4581, + "theoretical_loss": 3.475789242826307, + "tokens_seen": 1712324608 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004857142857142857, + "loss": 1.4692, + "theoretical_loss": 3.4756063813251883, + "tokens_seen": 1713373184 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0013160620583221316, + "objective/train/docs_used": 971016, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.787954568862915, + "objective/train/original_loss": 2.7879538536071777, + "objective/train/theoretical_loss": 3.475537845188954, + "objective/train/tokens_used": 1734226400, + "objective/train/value_avg": -0.0160675048828125, + "objective/train/value_loss": 0.002894223900511861, + "objective/train/value_max": -0.0004124641418457031, + "objective/train/value_min": -0.84619140625, + "objective/train/value_reward_corr": 0.5923747806266625, + "objective/train/value_std": 0.0345458984375, + "objective/train/weight_avg": 1.0001459121704102, + "objective/train/weighted_lm_loss": 2.7877933979034424, + "objective/train/weights_max": 1.034557580947876, + "objective/train/weights_min": 0.9236950278282166, + "theoretical_loss": 3.475537845188954, + "tokens_seen": 1713766400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004853932584269663, + "loss": 1.4792, + "theoretical_loss": 3.4754236630132325, + "tokens_seen": 1714421760 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.001593328663147986, + "objective/train/docs_used": 971702, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.1668810844421387, + "objective/train/original_loss": 2.1668806076049805, + "objective/train/theoretical_loss": 3.4752524944631142, + "objective/train/tokens_used": 1735864800, + "objective/train/value_avg": -0.0161895751953125, + "objective/train/value_loss": 0.004054420627653599, + "objective/train/value_max": -0.0005931854248046875, + "objective/train/value_min": -0.2489013671875, + "objective/train/value_reward_corr": 0.09220569247526976, + "objective/train/value_std": 0.0189361572265625, + "objective/train/weight_avg": 0.9998605847358704, + "objective/train/weighted_lm_loss": 2.1659088134765625, + "objective/train/weights_max": 1.0200660228729248, + "objective/train/weights_min": 0.9194103479385376, + "theoretical_loss": 3.4752524944631142, + "tokens_seen": 1715404800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004850722311396469, + "loss": 1.4516, + "theoretical_loss": 3.4752410876908413, + "tokens_seen": 1715470336 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048475120385232744, + "loss": 1.4484, + "theoretical_loss": 3.475058655158816, + "tokens_seen": 1716518912 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.007272266782820225, + "objective/train/docs_used": 972817, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.5942862033843994, + "objective/train/original_loss": 2.5942862033843994, + "objective/train/theoretical_loss": 3.47496749237705, + "objective/train/tokens_used": 1737503200, + "objective/train/value_avg": -0.031463623046875, + "objective/train/value_loss": 0.00488921906799078, + "objective/train/value_max": -0.00043892860412597656, + "objective/train/value_min": -0.63037109375, + "objective/train/value_reward_corr": 0.6617431821868511, + "objective/train/value_std": 0.07159423828125, + "objective/train/weight_avg": 1.0007514953613281, + "objective/train/weighted_lm_loss": 2.596125602722168, + "objective/train/weights_max": 1.049217700958252, + "objective/train/weights_min": 0.9164813756942749, + "theoretical_loss": 3.47496749237705, + "tokens_seen": 1717043200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048443017656500805, + "loss": 1.4726, + "theoretical_loss": 3.474876365218357, + "tokens_seen": 1717567488 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004841091492776886, + "loss": 1.4497, + "theoretical_loss": 3.4746942176710633, + "tokens_seen": 1718616064 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.002110459841787815, + "objective/train/docs_used": 973424, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1721322536468506, + "objective/train/original_loss": 3.1721324920654297, + "objective/train/theoretical_loss": 3.4746828381727335, + "objective/train/tokens_used": 1739141600, + "objective/train/value_avg": -0.01520538330078125, + "objective/train/value_loss": 0.003556867130100727, + "objective/train/value_max": -0.00045299530029296875, + "objective/train/value_min": -0.98974609375, + "objective/train/value_reward_corr": 0.5529929824013944, + "objective/train/value_std": 0.031768798828125, + "objective/train/weight_avg": 1.0002284049987793, + "objective/train/weighted_lm_loss": 3.1722445487976074, + "objective/train/weights_max": 1.0489916801452637, + "objective/train/weights_min": 0.907014012336731, + "theoretical_loss": 3.4746828381727335, + "tokens_seen": 1718681600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004837881219903692, + "loss": 1.4598, + "theoretical_loss": 3.4745122123189294, + "tokens_seen": 1719664640 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.007324881386011839, + "objective/train/docs_used": 974650, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.967808246612549, + "objective/train/original_loss": 2.967808485031128, + "objective/train/theoretical_loss": 3.4743985310945047, + "objective/train/tokens_used": 1740780000, + "objective/train/value_avg": -0.020416259765625, + "objective/train/value_loss": 0.003528261324390769, + "objective/train/value_max": -0.0004124641418457031, + "objective/train/value_min": -0.9423828125, + "objective/train/value_reward_corr": 0.41494888814229636, + "objective/train/value_std": 0.0435791015625, + "objective/train/weight_avg": 1.000749945640564, + "objective/train/weighted_lm_loss": 2.9703612327575684, + "objective/train/weights_max": 1.0701733827590942, + "objective/train/weights_min": 0.9134781956672668, + "theoretical_loss": 3.4743985310945047, + "tokens_seen": 1720320000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004834670947030498, + "loss": 1.4205, + "theoretical_loss": 3.4743303489643473, + "tokens_seen": 1720713216 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004831460674157304, + "loss": 1.4586, + "theoretical_loss": 3.474148627410102, + "tokens_seen": 1721761792 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.0009655365138314664, + "objective/train/docs_used": 975130, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.627814769744873, + "objective/train/original_loss": 2.627814531326294, + "objective/train/theoretical_loss": 3.4741145703890632, + "objective/train/tokens_used": 1742418400, + "objective/train/value_avg": -0.0180816650390625, + "objective/train/value_loss": 0.0063412925228476524, + "objective/train/value_max": -0.0004222393035888672, + "objective/train/value_min": -0.93798828125, + "objective/train/value_reward_corr": 0.5211891367964365, + "objective/train/value_std": 0.037811279296875, + "objective/train/weight_avg": 0.9999345541000366, + "objective/train/weighted_lm_loss": 2.627532482147217, + "objective/train/weights_max": 1.0467264652252197, + "objective/train/weights_min": 0.9081811904907227, + "theoretical_loss": 3.4741145703890632, + "tokens_seen": 1721958400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048282504012841094, + "loss": 1.4353, + "theoretical_loss": 3.4739670474593742, + "tokens_seen": 1722810368 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0029097222723066807, + "objective/train/docs_used": 976352, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.791840076446533, + "objective/train/original_loss": 2.7918403148651123, + "objective/train/theoretical_loss": 3.473830955305458, + "objective/train/tokens_used": 1744056800, + "objective/train/value_avg": -0.0200347900390625, + "objective/train/value_loss": 0.00581714604049921, + "objective/train/value_max": -0.00037550926208496094, + "objective/train/value_min": -0.80712890625, + "objective/train/value_reward_corr": 0.49381482386127284, + "objective/train/value_std": 0.0372314453125, + "objective/train/weight_avg": 1.0003196001052856, + "objective/train/weighted_lm_loss": 2.792337417602539, + "objective/train/weights_max": 1.0490694046020508, + "objective/train/weights_min": 0.9180986285209656, + "theoretical_loss": 3.473830955305458, + "tokens_seen": 1723596800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004825040128410915, + "loss": 1.4686, + "theoretical_loss": 3.4737856089157355, + "tokens_seen": 1723858944 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004821829855537721, + "loss": 1.4331, + "theoretical_loss": 3.4736043115831507, + "tokens_seen": 1724907520 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.001735880272462964, + "objective/train/docs_used": 977030, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1852447986602783, + "objective/train/original_loss": 3.1852447986602783, + "objective/train/theoretical_loss": 3.473547685095078, + "objective/train/tokens_used": 1745695200, + "objective/train/value_avg": -0.015869140625, + "objective/train/value_loss": 0.002546546282246709, + "objective/train/value_max": -0.0005421638488769531, + "objective/train/value_min": -0.229248046875, + "objective/train/value_reward_corr": 0.3233499125334463, + "objective/train/value_std": 0.018463134765625, + "objective/train/weight_avg": 0.9998390078544617, + "objective/train/weighted_lm_loss": 3.1855361461639404, + "objective/train/weights_max": 1.0200964212417603, + "objective/train/weights_min": 0.931285560131073, + "theoretical_loss": 3.473547685095078, + "tokens_seen": 1725235200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048186195826645267, + "loss": 1.4423, + "theoretical_loss": 3.4734231552659747, + "tokens_seen": 1725956096 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.007105717901140451, + "objective/train/docs_used": 977402, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9742109775543213, + "objective/train/original_loss": 2.9742109775543213, + "objective/train/theoretical_loss": 3.4732647590116423, + "objective/train/tokens_used": 1747333600, + "objective/train/value_avg": -0.018646240234375, + "objective/train/value_loss": 0.003143940819427371, + "objective/train/value_max": -0.0004239082336425781, + "objective/train/value_min": -0.8896484375, + "objective/train/value_reward_corr": 0.4438710702479362, + "objective/train/value_std": 0.04266357421875, + "objective/train/weight_avg": 1.0007262229919434, + "objective/train/weighted_lm_loss": 2.975799798965454, + "objective/train/weights_max": 1.0803766250610352, + "objective/train/weights_min": 0.927905261516571, + "theoretical_loss": 3.4732647590116423, + "tokens_seen": 1726873600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004815409309791333, + "loss": 1.4554, + "theoretical_loss": 3.473242139768953, + "tokens_seen": 1727004672 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004812199036918138, + "loss": 1.4474, + "theoretical_loss": 3.4730612648972174, + "tokens_seen": 1728053248 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.002343316562473774, + "objective/train/docs_used": 978835, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9067015647888184, + "objective/train/original_loss": 2.906700849533081, + "objective/train/theoretical_loss": 3.4729821763111923, + "objective/train/tokens_used": 1748972000, + "objective/train/value_avg": -0.01433563232421875, + "objective/train/value_loss": 0.0036365205887705088, + "objective/train/value_max": -0.0002982616424560547, + "objective/train/value_min": -0.9306640625, + "objective/train/value_reward_corr": 0.5942477838519515, + "objective/train/value_std": 0.03692626953125, + "objective/train/weight_avg": 1.0002521276474, + "objective/train/weighted_lm_loss": 2.9073007106781006, + "objective/train/weights_max": 1.0368986129760742, + "objective/train/weights_min": 0.9072844386100769, + "theoretical_loss": 3.4729821763111923, + "tokens_seen": 1728512000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004808988764044944, + "loss": 1.4467, + "theoretical_loss": 3.4728805304562904, + "tokens_seen": 1729101824 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.005371070466935635, + "objective/train/docs_used": 979527, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.016993761062622, + "objective/train/original_loss": 3.016993999481201, + "objective/train/theoretical_loss": 3.472699936252079, + "objective/train/tokens_used": 1750610400, + "objective/train/value_avg": -0.0123291015625, + "objective/train/value_loss": 0.0011469257296994328, + "objective/train/value_max": -0.0003669261932373047, + "objective/train/value_min": -0.8037109375, + "objective/train/value_reward_corr": 0.5192437745025706, + "objective/train/value_std": 0.0201873779296875, + "objective/train/weight_avg": 1.0005426406860352, + "objective/train/weighted_lm_loss": 3.0187971591949463, + "objective/train/weights_max": 1.0187828540802002, + "objective/train/weights_min": 0.9124981164932251, + "theoretical_loss": 3.472699936252079, + "tokens_seen": 1730150400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048057784911717495, + "loss": 1.4388, + "theoretical_loss": 3.472699936252079, + "tokens_seen": 1730150400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004802568218298555, + "loss": 1.4598, + "theoretical_loss": 3.4725194820908776, + "tokens_seen": 1731198976 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.004390017595142126, + "objective/train/docs_used": 980288, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1625566482543945, + "objective/train/original_loss": 3.1625566482543945, + "objective/train/theoretical_loss": 3.4724180380949567, + "objective/train/tokens_used": 1752248800, + "objective/train/value_avg": -0.00847625732421875, + "objective/train/value_loss": 0.0008090375340543687, + "objective/train/value_max": -0.0004322528839111328, + "objective/train/value_min": -0.378173828125, + "objective/train/value_reward_corr": 0.2459033173666801, + "objective/train/value_std": 0.01068878173828125, + "objective/train/weight_avg": 1.0004429817199707, + "objective/train/weighted_lm_loss": 3.1638638973236084, + "objective/train/weights_max": 1.0225074291229248, + "objective/train/weights_min": 0.9538688063621521, + "theoretical_loss": 3.4724180380949567, + "tokens_seen": 1731788800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004799357945425361, + "loss": 1.4578, + "theoretical_loss": 3.4723391677793627, + "tokens_seen": 1732247552 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047961476725521667, + "loss": 1.4861, + "theoretical_loss": 3.472158993124598, + "tokens_seen": 1733296128 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": -0.001343807321973145, + "objective/train/docs_used": 981145, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.119926691055298, + "objective/train/original_loss": 3.119926691055298, + "objective/train/theoretical_loss": 3.4721364811027735, + "objective/train/tokens_used": 1753887200, + "objective/train/value_avg": -0.0413818359375, + "objective/train/value_loss": 0.015465700067579746, + "objective/train/value_max": -0.0008392333984375, + "objective/train/value_min": -0.99462890625, + "objective/train/value_reward_corr": 0.5900479245055504, + "objective/train/value_std": 0.09844970703125, + "objective/train/weight_avg": 0.9999415874481201, + "objective/train/weighted_lm_loss": 3.1179487705230713, + "objective/train/weights_max": 1.0798722505569458, + "objective/train/weights_min": 0.9090883135795593, + "theoretical_loss": 3.4721364811027735, + "tokens_seen": 1733427200 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004792937399678973, + "loss": 1.4999, + "theoretical_loss": 3.471978957934027, + "tokens_seen": 1734344704 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.011527429334819317, + "objective/train/docs_used": 982466, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.1917495727539062, + "objective/train/original_loss": 3.1917498111724854, + "objective/train/theoretical_loss": 3.4718552645407583, + "objective/train/tokens_used": 1755525600, + "objective/train/value_avg": -0.0181732177734375, + "objective/train/value_loss": 0.0013451635604724288, + "objective/train/value_max": -0.0004012584686279297, + "objective/train/value_min": -0.7255859375, + "objective/train/value_reward_corr": 0.2130055410845676, + "objective/train/value_std": 0.0230712890625, + "objective/train/weight_avg": 1.0011593103408813, + "objective/train/weighted_lm_loss": 3.195122480392456, + "objective/train/weights_max": 1.0741249322891235, + "objective/train/weights_min": 0.9214996695518494, + "theoretical_loss": 3.4718552645407583, + "tokens_seen": 1735065600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047897271268057784, + "loss": 1.4515, + "theoretical_loss": 3.471799062015476, + "tokens_seen": 1735393280 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047865168539325845, + "loss": 1.4528, + "theoretical_loss": 3.4716193051771524, + "tokens_seen": 1736441856 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.005148132331669331, + "objective/train/docs_used": 983129, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.8825747966766357, + "objective/train/original_loss": 2.8825747966766357, + "objective/train/theoretical_loss": 3.4715743876764176, + "objective/train/tokens_used": 1757164000, + "objective/train/value_avg": -0.0165863037109375, + "objective/train/value_loss": 0.0009730277815833688, + "objective/train/value_max": -0.00031256675720214844, + "objective/train/value_min": -0.80126953125, + "objective/train/value_reward_corr": 0.41165130465673133, + "objective/train/value_std": 0.023834228515625, + "objective/train/weight_avg": 1.0005196332931519, + "objective/train/weighted_lm_loss": 2.883727788925171, + "objective/train/weights_max": 1.0536178350448608, + "objective/train/weights_min": 0.9683568477630615, + "theoretical_loss": 3.4715743876764176, + "tokens_seen": 1736704000 + }, + { + "epoch": 0.53, + "learning_rate": 0.000478330658105939, + "loss": 1.4851, + "theoretical_loss": 3.4714396872276425, + "tokens_seen": 1737490432 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.006801634095609188, + "objective/train/docs_used": 984348, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.2550907135009766, + "objective/train/original_loss": 3.2550904750823975, + "objective/train/theoretical_loss": 3.4712938497795207, + "objective/train/tokens_used": 1758802400, + "objective/train/value_avg": -0.0113067626953125, + "objective/train/value_loss": 0.0004059189814142883, + "objective/train/value_max": -0.00046372413635253906, + "objective/train/value_min": -0.2239990234375, + "objective/train/value_reward_corr": 0.38198116346804484, + "objective/train/value_std": 0.01392364501953125, + "objective/train/weight_avg": 1.0006822347640991, + "objective/train/weighted_lm_loss": 3.2572927474975586, + "objective/train/weights_max": 1.0224242210388184, + "objective/train/weights_min": 0.9675983786582947, + "theoretical_loss": 3.4712938497795207, + "tokens_seen": 1738342400 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004780096308186196, + "loss": 1.4483, + "theoretical_loss": 3.471260207975912, + "tokens_seen": 1738539008 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047768860353130017, + "loss": 1.465, + "theoretical_loss": 3.471080867231304, + "tokens_seen": 1739587584 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.003640508744865656, + "objective/train/docs_used": 985100, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.737335205078125, + "objective/train/original_loss": 2.737335205078125, + "objective/train/theoretical_loss": 3.471013650122095, + "objective/train/tokens_used": 1760440800, + "objective/train/value_avg": -0.021453857421875, + "objective/train/value_loss": 0.0031879947055131197, + "objective/train/value_max": -0.0006189346313476562, + "objective/train/value_min": -0.93017578125, + "objective/train/value_reward_corr": 0.5942503112339217, + "objective/train/value_std": 0.04364013671875, + "objective/train/weight_avg": 1.0003798007965088, + "objective/train/weighted_lm_loss": 2.738243818283081, + "objective/train/weights_max": 1.059588074684143, + "objective/train/weights_min": 0.9121102094650269, + "theoretical_loss": 3.471013650122095, + "tokens_seen": 1739980800 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004773675762439807, + "loss": 1.4488, + "theoretical_loss": 3.470901664803538, + "tokens_seen": 1740636160 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": -0.035582881420850754, + "objective/train/docs_used": 986266, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7278146743774414, + "objective/train/original_loss": 2.7278144359588623, + "objective/train/theoretical_loss": 3.4707337879784133, + "objective/train/tokens_used": 1762079200, + "objective/train/value_avg": -0.031646728515625, + "objective/train/value_loss": 0.029011575505137444, + "objective/train/value_max": -0.0003726482391357422, + "objective/train/value_min": -0.91845703125, + "objective/train/value_reward_corr": 0.5768897990050087, + "objective/train/value_std": 0.052215576171875, + "objective/train/weight_avg": 0.9965840578079224, + "objective/train/weighted_lm_loss": 2.7177963256835938, + "objective/train/weights_max": 1.053080677986145, + "objective/train/weights_min": 0.9309263229370117, + "theoretical_loss": 3.4707337879784133, + "tokens_seen": 1741619200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047704654895666134, + "loss": 1.4442, + "theoretical_loss": 3.470722600502711, + "tokens_seen": 1741684736 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004767255216693419, + "loss": 1.4568, + "theoretical_loss": 3.470543674139293, + "tokens_seen": 1742733312 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.005413671024143696, + "objective/train/docs_used": 986906, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7958436012268066, + "objective/train/original_loss": 2.7958438396453857, + "objective/train/theoretical_loss": 3.470454262624987, + "objective/train/tokens_used": 1763717600, + "objective/train/value_avg": -0.01528167724609375, + "objective/train/value_loss": 0.002177622402086854, + "objective/train/value_max": -0.0005054473876953125, + "objective/train/value_min": -0.387939453125, + "objective/train/value_reward_corr": 0.3571022779705682, + "objective/train/value_std": 0.034515380859375, + "objective/train/weight_avg": 1.0005520582199097, + "objective/train/weighted_lm_loss": 2.7969374656677246, + "objective/train/weights_max": 1.0329557657241821, + "objective/train/weights_min": 0.9209885001182556, + "theoretical_loss": 3.470454262624987, + "tokens_seen": 1743257600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004764044943820225, + "loss": 1.4543, + "theoretical_loss": 3.4703648855241283, + "tokens_seen": 1743781888 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047608346709470306, + "loss": 1.439, + "theoretical_loss": 3.470186234468435, + "tokens_seen": 1744830464 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0019685360603034496, + "objective/train/docs_used": 987618, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9564805030822754, + "objective/train/original_loss": 2.9564805030822754, + "objective/train/theoretical_loss": 3.4701750733405556, + "objective/train/tokens_used": 1765356000, + "objective/train/value_avg": -0.0137939453125, + "objective/train/value_loss": 0.001132114906795323, + "objective/train/value_max": -0.0002613067626953125, + "objective/train/value_min": -0.2340087890625, + "objective/train/value_reward_corr": 0.5337671112936905, + "objective/train/value_std": 0.017547607421875, + "objective/train/weight_avg": 1.0002025365829468, + "objective/train/weighted_lm_loss": 2.957582712173462, + "objective/train/weights_max": 1.0211681127548218, + "objective/train/weights_min": 0.9391404390335083, + "theoretical_loss": 3.4701750733405556, + "tokens_seen": 1744896000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047576243980738367, + "loss": 1.4289, + "theoretical_loss": 3.4700077207838023, + "tokens_seen": 1745879040 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.003577066119760275, + "objective/train/docs_used": 989102, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 3.044513702392578, + "objective/train/original_loss": 3.0445139408111572, + "objective/train/theoretical_loss": 3.469896219406081, + "objective/train/tokens_used": 1766994400, + "objective/train/value_avg": -0.0153656005859375, + "objective/train/value_loss": 0.0025458920281380415, + "objective/train/value_max": -0.0003654956817626953, + "objective/train/value_min": -0.69189453125, + "objective/train/value_reward_corr": 0.4253957665689127, + "objective/train/value_std": 0.032196044921875, + "objective/train/weight_avg": 1.0003703832626343, + "objective/train/weighted_lm_loss": 3.046025276184082, + "objective/train/weights_max": 1.0587223768234253, + "objective/train/weights_min": 0.9249367713928223, + "theoretical_loss": 3.469896219406081, + "tokens_seen": 1746534400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047544141252006423, + "loss": 1.4324, + "theoretical_loss": 3.4698293442821915, + "tokens_seen": 1746927616 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047512038523274484, + "loss": 1.4419, + "theoretical_loss": 3.4696511047759317, + "tokens_seen": 1747976192 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.006714529357850552, + "objective/train/docs_used": 989762, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.9737558364868164, + "objective/train/original_loss": 2.9737560749053955, + "objective/train/theoretical_loss": 3.4696177001047337, + "objective/train/tokens_used": 1768632800, + "objective/train/value_avg": -0.016204833984375, + "objective/train/value_loss": 0.0007580204983241856, + "objective/train/value_max": -0.00046372413635253906, + "objective/train/value_min": -0.39013671875, + "objective/train/value_reward_corr": 0.46685199356252166, + "objective/train/value_std": 0.0215606689453125, + "objective/train/weight_avg": 1.0006752014160156, + "objective/train/weighted_lm_loss": 2.9752283096313477, + "objective/train/weights_max": 1.0212786197662354, + "objective/train/weights_min": 0.9738178849220276, + "theoretical_loss": 3.4696177001047337, + "tokens_seen": 1748172800 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004747993579454254, + "loss": 1.4372, + "theoretical_loss": 3.4694730020777245, + "tokens_seen": 1749024768 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": -0.012495055794715881, + "objective/train/docs_used": 990903, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.926884889602661, + "objective/train/original_loss": 2.9268851280212402, + "objective/train/theoretical_loss": 3.4693395147218875, + "objective/train/tokens_used": 1770271200, + "objective/train/value_avg": -0.02215576171875, + "objective/train/value_loss": 0.0057158940471708775, + "objective/train/value_max": -0.00031495094299316406, + "objective/train/value_min": -0.67578125, + "objective/train/value_reward_corr": 0.5168014231793994, + "objective/train/value_std": 0.033599853515625, + "objective/train/weight_avg": 0.9987788200378418, + "objective/train/weighted_lm_loss": 2.923095941543579, + "objective/train/weights_max": 1.0447301864624023, + "objective/train/weights_min": 0.9232602715492249, + "theoretical_loss": 3.4693395147218875, + "tokens_seen": 1749811200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047447833065810595, + "loss": 1.4324, + "theoretical_loss": 3.4692950360006365, + "tokens_seen": 1750073344 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047415730337078656, + "loss": 1.4605, + "theoretical_loss": 3.469117206358103, + "tokens_seen": 1751121920 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.002563327318057418, + "objective/train/docs_used": 991641, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7670137882232666, + "objective/train/original_loss": 2.7670130729675293, + "objective/train/theoretical_loss": 3.4690616625451094, + "objective/train/tokens_used": 1771909600, + "objective/train/value_avg": -0.0131988525390625, + "objective/train/value_loss": 0.002694714581593871, + "objective/train/value_max": -0.0002982616424560547, + "objective/train/value_min": -0.72265625, + "objective/train/value_reward_corr": 0.4934608962356861, + "objective/train/value_std": 0.0196533203125, + "objective/train/weight_avg": 1.0002695322036743, + "objective/train/weighted_lm_loss": 2.7677690982818604, + "objective/train/weights_max": 1.0309031009674072, + "objective/train/weights_min": 0.9125615954399109, + "theoretical_loss": 3.4690616625451094, + "tokens_seen": 1751449600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047383627608346706, + "loss": 1.425, + "theoretical_loss": 3.4689395129639253, + "tokens_seen": 1752170496 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": -0.002976392861455679, + "objective/train/docs_used": 992944, + "objective/train/instantaneous_batch_size": 16, + "objective/train/instantaneous_microbatch_size": 16384, + "objective/train/lm_loss": 2.7343637943267822, + "objective/train/original_loss": 2.734363555908203, + "objective/train/theoretical_loss": 3.4687841428641515, + "objective/train/tokens_used": 1773548000, + "objective/train/value_avg": -0.01247406005859375, + "objective/train/value_loss": 0.002832220634445548, + "objective/train/value_max": -0.0004601478576660156, + "objective/train/value_min": -0.419189453125, + "objective/train/value_reward_corr": 0.32546630776569146, + "objective/train/value_std": 0.019073486328125, + "objective/train/weight_avg": 0.9997163414955139, + "objective/train/weighted_lm_loss": 2.734360694885254, + "objective/train/weights_max": 1.041724681854248, + "objective/train/weights_min": 0.9351781606674194, + "theoretical_loss": 3.4687841428641515, + "tokens_seen": 1753088000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004735152487961477, + "loss": 1.4437, + "theoretical_loss": 3.468761955632271, + "tokens_seen": 1753219072 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047319422150882823, + "loss": 1.471, + "theoretical_loss": 3.4685845341776704, + "tokens_seen": 1754267648 + } + ], + "max_steps": 3147, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.9527401807505e+17, + "trial_name": null, + "trial_params": null +}