{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 12588, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.936507936507936e-06, "loss": 1.0932, "theoretical_loss": 14.920781838632275, "tokens_seen": 262144 }, { "epoch": 0.0, "learning_rate": 1.5873015873015872e-05, "loss": 1.0937, "theoretical_loss": 12.718594708127029, "tokens_seen": 524288 }, { "epoch": 0.0, "learning_rate": 2.380952380952381e-05, "loss": 1.0519, "theoretical_loss": 11.615184291350435, "tokens_seen": 786432 }, { "epoch": 0.0, "learning_rate": 3.1746031746031745e-05, "loss": 1.0059, "theoretical_loss": 10.904893169100655, "tokens_seen": 1048576 }, { "epoch": 0.0, "learning_rate": 3.968253968253968e-05, "loss": 0.9761, "theoretical_loss": 10.392029026407034, "tokens_seen": 1310720 }, { "epoch": 0.0, "learning_rate": 4.761904761904762e-05, "loss": 0.9521, "theoretical_loss": 9.996134261483984, "tokens_seen": 1572864 }, { "epoch": 0.0, "learning_rate": 5.555555555555555e-05, "loss": 0.9364, "theoretical_loss": 9.67682184172525, "tokens_seen": 1835008 }, { "epoch": 0.0, "learning_rate": 6.349206349206349e-05, "loss": 0.9245, "theoretical_loss": 9.41114487355416, "tokens_seen": 2097152 }, { "epoch": 0.0, "learning_rate": 7.142857142857142e-05, "loss": 0.9103, "theoretical_loss": 9.184905895151996, "tokens_seen": 2359296 }, { "epoch": 0.0, "learning_rate": 7.936507936507937e-05, "loss": 0.8933, "theoretical_loss": 8.988754572553061, "tokens_seen": 2621440 }, { "epoch": 0.0, "learning_rate": 8.73015873015873e-05, "loss": 0.8721, "theoretical_loss": 8.816230875422118, "tokens_seen": 2883584 }, { "epoch": 0.0, "learning_rate": 9.523809523809524e-05, "loss": 0.8552, "theoretical_loss": 8.66269920037918, "tokens_seen": 3145728 }, { "epoch": 0.0, "learning_rate": 0.00010317460317460317, "loss": 0.8283, "theoretical_loss": 8.524729102289708, "tokens_seen": 3407872 }, { "epoch": 0.0, "learning_rate": 0.0001111111111111111, "loss": 0.8016, "theoretical_loss": 8.399716359763914, "tokens_seen": 3670016 }, { "epoch": 0.0, "learning_rate": 0.00011904761904761905, "loss": 0.7709, "theoretical_loss": 8.285641004895568, "tokens_seen": 3932160 }, { "epoch": 0.0, "learning_rate": 0.00012698412698412698, "loss": 0.7402, "theoretical_loss": 8.180907195283321, "tokens_seen": 4194304 }, { "epoch": 0.0, "learning_rate": 0.0001349206349206349, "loss": 0.7102, "theoretical_loss": 8.084233979345122, "tokens_seen": 4456448 }, { "epoch": 0.0, "learning_rate": 0.00014285714285714284, "loss": 0.6751, "theoretical_loss": 7.9945788049155055, "tokens_seen": 4718592 }, { "epoch": 0.0, "learning_rate": 0.0001507936507936508, "loss": 0.6503, "theoretical_loss": 7.911082722632908, "tokens_seen": 4980736 }, { "epoch": 0.0, "learning_rate": 0.00015873015873015873, "loss": 0.6184, "theoretical_loss": 7.83303033759787, "tokens_seen": 5242880 }, { "epoch": 0.0, "learning_rate": 0.00016666666666666666, "loss": 0.5919, "theoretical_loss": 7.759820016443023, "tokens_seen": 5505024 }, { "epoch": 0.0, "learning_rate": 0.0001746031746031746, "loss": 0.5615, "theoretical_loss": 7.690941370375033, "tokens_seen": 5767168 }, { "epoch": 0.0, "learning_rate": 0.00018253968253968252, "loss": 0.5314, "theoretical_loss": 7.6259579939239845, "tokens_seen": 6029312 }, { "epoch": 0.0, "learning_rate": 0.00019047619047619048, "loss": 0.5049, "theoretical_loss": 7.564494061943624, "tokens_seen": 6291456 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.19619600474834442, "objective/train/docs_used": 9704, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 7.333767890930176, "objective/train/original_loss": 7.333766937255859, "objective/train/theoretical_loss": 7.5062238006917354, "objective/train/tokens_used": 27013600, "objective/train/value_avg": -0.204345703125, "objective/train/value_loss": 0.03900672122836113, "objective/train/value_max": -0.19873046875, "objective/train/value_min": -0.251953125, "objective/train/value_reward_corr": -0.012004978426084556, "objective/train/value_std": 0.01258087158203125, "objective/train/weight_avg": 1.2170684337615967, "objective/train/weighted_lm_loss": 8.908797264099121, "objective/train/weights_max": 1.2865357398986816, "objective/train/weights_min": 0.4672624170780182, "theoretical_loss": 7.5062238006917354, "tokens_seen": 6553600 }, { "epoch": 0.0, "learning_rate": 0.0001984126984126984, "loss": 0.4798, "theoretical_loss": 7.5062238006917354, "tokens_seen": 6553600 }, { "epoch": 0.0, "learning_rate": 0.00020634920634920634, "loss": 0.4565, "theoretical_loss": 7.45086312850561, "tokens_seen": 6815744 }, { "epoch": 0.0, "learning_rate": 0.00021428571428571427, "loss": 0.4398, "theoretical_loss": 7.398162954262078, "tokens_seen": 7077888 }, { "epoch": 0.0, "learning_rate": 0.0002222222222222222, "loss": 0.4258, "theoretical_loss": 7.347903756717382, "tokens_seen": 7340032 }, { "epoch": 0.0, "learning_rate": 0.00023015873015873016, "loss": 0.4009, "theoretical_loss": 7.299891163694537, "tokens_seen": 7602176 }, { "epoch": 0.0, "learning_rate": 0.0002380952380952381, "loss": 0.391, "theoretical_loss": 7.253952319156202, "tokens_seen": 7864320 }, { "epoch": 0.0, "learning_rate": 0.000246031746031746, "loss": 0.3753, "theoretical_loss": 7.2099328765932205, "tokens_seen": 8126464 }, { "epoch": 0.0, "learning_rate": 0.00025396825396825396, "loss": 0.366, "theoretical_loss": 7.167694494355343, "tokens_seen": 8388608 }, { "epoch": 0.0, "learning_rate": 0.0002619047619047619, "loss": 0.3573, "theoretical_loss": 7.127112736305475, "tokens_seen": 8650752 }, { "epoch": 0.0, "learning_rate": 0.0002698412698412698, "loss": 0.3476, "theoretical_loss": 7.0880753020982725, "tokens_seen": 8912896 }, { "epoch": 0.0, "learning_rate": 0.0002777777777777778, "loss": 0.3461, "theoretical_loss": 7.050480527300383, "tokens_seen": 9175040 }, { "epoch": 0.0, "learning_rate": 0.0002857142857142857, "loss": 0.3395, "theoretical_loss": 7.014236105786485, "tokens_seen": 9437184 }, { "epoch": 0.0, "learning_rate": 0.0002936507936507937, "loss": 0.3386, "theoretical_loss": 6.979257996300014, "tokens_seen": 9699328 }, { "epoch": 0.0, "learning_rate": 0.0003015873015873016, "loss": 0.3357, "theoretical_loss": 6.945469482441503, "tokens_seen": 9961472 }, { "epoch": 0.0, "learning_rate": 0.00030952380952380956, "loss": 0.3318, "theoretical_loss": 6.912800361140576, "tokens_seen": 10223616 }, { "epoch": 0.0, "learning_rate": 0.00031746031746031746, "loss": 0.33, "theoretical_loss": 6.881186239250335, "tokens_seen": 10485760 }, { "epoch": 0.0, "learning_rate": 0.0003253968253968254, "loss": 0.3287, "theoretical_loss": 6.8505679215514235, "tokens_seen": 10747904 }, { "epoch": 0.0, "learning_rate": 0.0003333333333333333, "loss": 0.3244, "theoretical_loss": 6.8208908763759295, "tokens_seen": 11010048 }, { "epoch": 0.0, "learning_rate": 0.0003412698412698413, "loss": 0.3241, "theoretical_loss": 6.79210476741633, "tokens_seen": 11272192 }, { "epoch": 0.0, "learning_rate": 0.0003492063492063492, "loss": 0.3206, "theoretical_loss": 6.76416304219278, "tokens_seen": 11534336 }, { "epoch": 0.0, "learning_rate": 0.00035714285714285714, "loss": 0.3173, "theoretical_loss": 6.737022569206117, "tokens_seen": 11796480 }, { "epoch": 0.0, "learning_rate": 0.00036507936507936505, "loss": 0.3156, "theoretical_loss": 6.710643317075979, "tokens_seen": 12058624 }, { "epoch": 0.0, "learning_rate": 0.000373015873015873, "loss": 0.3142, "theoretical_loss": 6.684988070009584, "tokens_seen": 12320768 }, { "epoch": 0.0, "learning_rate": 0.00038095238095238096, "loss": 0.3193, "theoretical_loss": 6.660022174811009, "tokens_seen": 12582912 }, { "epoch": 0.0, "learning_rate": 0.0003888888888888889, "loss": 0.3096, "theoretical_loss": 6.6357133153579175, "tokens_seen": 12845056 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.028042539954185486, "objective/train/docs_used": 12170, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 6.02435827255249, "objective/train/original_loss": 6.02435827255249, "objective/train/theoretical_loss": 6.612031311070119, "objective/train/tokens_used": 33567200, "objective/train/value_avg": -0.034576416015625, "objective/train/value_loss": 0.0012018069392070174, "objective/train/value_max": -0.01337432861328125, "objective/train/value_min": -0.052825927734375, "objective/train/value_reward_corr": -0.01474424872782136, "objective/train/value_std": 0.005672454833984375, "objective/train/weight_avg": 1.0286412239074707, "objective/train/weighted_lm_loss": 6.1940083503723145, "objective/train/weights_max": 1.054246187210083, "objective/train/weights_min": 0.3763794004917145, "theoretical_loss": 6.612031311070119, "tokens_seen": 13107200 }, { "epoch": 0.0, "learning_rate": 0.0003968253968253968, "loss": 0.309, "theoretical_loss": 6.612031311070119, "tokens_seen": 13107200 }, { "epoch": 0.0, "learning_rate": 0.0004047619047619048, "loss": 0.3093, "theoretical_loss": 6.588947936394168, "tokens_seen": 13369344 }, { "epoch": 0.0, "learning_rate": 0.0004126984126984127, "loss": 0.303, "theoretical_loss": 6.566436758747731, "tokens_seen": 13631488 }, { "epoch": 0.0, "learning_rate": 0.00042063492063492065, "loss": 0.3026, "theoretical_loss": 6.544472992721121, "tokens_seen": 13893632 }, { "epoch": 0.0, "learning_rate": 0.00042857142857142855, "loss": 0.3027, "theoretical_loss": 6.523033368632323, "tokens_seen": 14155776 }, { "epoch": 0.0, "learning_rate": 0.0004365079365079365, "loss": 0.3037, "theoretical_loss": 6.502096013785574, "tokens_seen": 14417920 }, { "epoch": 0.0, "learning_rate": 0.0004444444444444444, "loss": 0.2987, "theoretical_loss": 6.481640344999435, "tokens_seen": 14680064 }, { "epoch": 0.0, "learning_rate": 0.00045238095238095237, "loss": 0.3013, "theoretical_loss": 6.461646971154669, "tokens_seen": 14942208 }, { "epoch": 0.0, "learning_rate": 0.00046031746031746033, "loss": 0.3009, "theoretical_loss": 6.442097604670096, "tokens_seen": 15204352 }, { "epoch": 0.0, "learning_rate": 0.0004682539682539683, "loss": 0.291, "theoretical_loss": 6.422974980950157, "tokens_seen": 15466496 }, { "epoch": 0.0, "learning_rate": 0.0004761904761904762, "loss": 0.295, "theoretical_loss": 6.404262784964672, "tokens_seen": 15728640 }, { "epoch": 0.0, "learning_rate": 0.00048412698412698415, "loss": 0.2957, "theoretical_loss": 6.3859455842220765, "tokens_seen": 15990784 }, { "epoch": 0.0, "learning_rate": 0.000492063492063492, "loss": 0.2955, "theoretical_loss": 6.368008767484675, "tokens_seen": 16252928 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 0.2931, "theoretical_loss": 6.350438488650175, "tokens_seen": 16515072 }, { "epoch": 0.01, "learning_rate": 0.0005079365079365079, "loss": 0.2938, "theoretical_loss": 6.333221615289645, "tokens_seen": 16777216 }, { "epoch": 0.01, "learning_rate": 0.0005158730158730159, "loss": 0.2898, "theoretical_loss": 6.316345681389436, "tokens_seen": 17039360 }, { "epoch": 0.01, "learning_rate": 0.0005238095238095238, "loss": 0.2847, "theoretical_loss": 6.2997988438948465, "tokens_seen": 17301504 }, { "epoch": 0.01, "learning_rate": 0.0005317460317460317, "loss": 0.2833, "theoretical_loss": 6.283569842697203, "tokens_seen": 17563648 }, { "epoch": 0.01, "learning_rate": 0.0005396825396825396, "loss": 0.2873, "theoretical_loss": 6.26764796374462, "tokens_seen": 17825792 }, { "epoch": 0.01, "learning_rate": 0.0005476190476190477, "loss": 0.2833, "theoretical_loss": 6.25202300499066, "tokens_seen": 18087936 }, { "epoch": 0.01, "learning_rate": 0.0005555555555555556, "loss": 0.284, "theoretical_loss": 6.236685244924882, "tokens_seen": 18350080 }, { "epoch": 0.01, "learning_rate": 0.0005634920634920635, "loss": 0.2831, "theoretical_loss": 6.2216254134558024, "tokens_seen": 18612224 }, { "epoch": 0.01, "learning_rate": 0.0005714285714285714, "loss": 0.2838, "theoretical_loss": 6.206834664939976, "tokens_seen": 18874368 }, { "epoch": 0.01, "learning_rate": 0.0005793650793650794, "loss": 0.2854, "theoretical_loss": 6.192304553171669, "tokens_seen": 19136512 }, { "epoch": 0.01, "learning_rate": 0.0005873015873015874, "loss": 0.2811, "theoretical_loss": 6.178027008165916, "tokens_seen": 19398656 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.023865671828389168, "objective/train/docs_used": 14656, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 5.437821865081787, "objective/train/original_loss": 5.437821388244629, "objective/train/theoretical_loss": 6.163994314584031, "objective/train/tokens_used": 40120800, "objective/train/value_avg": -0.032073974609375, "objective/train/value_loss": 0.0011683752527460456, "objective/train/value_max": -0.00933837890625, "objective/train/value_min": -0.06573486328125, "objective/train/value_reward_corr": 0.06998308804235678, "objective/train/value_std": 0.01036834716796875, "objective/train/weight_avg": 1.0244362354278564, "objective/train/weighted_lm_loss": 5.583455562591553, "objective/train/weights_max": 1.0648194551467896, "objective/train/weights_min": 0.37916356325149536, "theoretical_loss": 6.163994314584031, "tokens_seen": 19660800 }, { "epoch": 0.01, "learning_rate": 0.0005952380952380953, "loss": 0.282, "theoretical_loss": 6.163994314584031, "tokens_seen": 19660800 }, { "epoch": 0.01, "learning_rate": 0.0006031746031746032, "loss": 0.2786, "theoretical_loss": 6.150199091665225, "tokens_seen": 19922944 }, { "epoch": 0.01, "learning_rate": 0.0006111111111111112, "loss": 0.2766, "theoretical_loss": 6.136634274540901, "tokens_seen": 20185088 }, { "epoch": 0.01, "learning_rate": 0.0006190476190476191, "loss": 0.279, "theoretical_loss": 6.123293096819758, "tokens_seen": 20447232 }, { "epoch": 0.01, "learning_rate": 0.000626984126984127, "loss": 0.2761, "theoretical_loss": 6.1101690743422505, "tokens_seen": 20709376 }, { "epoch": 0.01, "learning_rate": 0.0006349206349206349, "loss": 0.2751, "theoretical_loss": 6.097255990012153, "tokens_seen": 20971520 }, { "epoch": 0.01, "learning_rate": 0.0006428571428571429, "loss": 0.2728, "theoretical_loss": 6.084547879621354, "tokens_seen": 21233664 }, { "epoch": 0.01, "learning_rate": 0.0006507936507936508, "loss": 0.2741, "theoretical_loss": 6.072039018591484, "tokens_seen": 21495808 }, { "epoch": 0.01, "learning_rate": 0.0006587301587301587, "loss": 0.2744, "theoretical_loss": 6.059723909562683, "tokens_seen": 21757952 }, { "epoch": 0.01, "learning_rate": 0.0006666666666666666, "loss": 0.2699, "theoretical_loss": 6.047597270765904, "tokens_seen": 22020096 }, { "epoch": 0.01, "learning_rate": 0.0006746031746031747, "loss": 0.271, "theoretical_loss": 6.035654025120612, "tokens_seen": 22282240 }, { "epoch": 0.01, "learning_rate": 0.0006825396825396826, "loss": 0.2683, "theoretical_loss": 6.023889290004692, "tokens_seen": 22544384 }, { "epoch": 0.01, "learning_rate": 0.0006904761904761905, "loss": 0.2678, "theoretical_loss": 6.012298367647816, "tokens_seen": 22806528 }, { "epoch": 0.01, "learning_rate": 0.0006984126984126984, "loss": 0.2651, "theoretical_loss": 6.000876736103618, "tokens_seen": 23068672 }, { "epoch": 0.01, "learning_rate": 0.0007063492063492064, "loss": 0.2683, "theoretical_loss": 5.989620040759641, "tokens_seen": 23330816 }, { "epoch": 0.01, "learning_rate": 0.0007142857142857143, "loss": 0.2624, "theoretical_loss": 5.978524086347409, "tokens_seen": 23592960 }, { "epoch": 0.01, "learning_rate": 0.0007222222222222222, "loss": 0.2659, "theoretical_loss": 5.967584829417934, "tokens_seen": 23855104 }, { "epoch": 0.01, "learning_rate": 0.0007301587301587301, "loss": 0.2616, "theoretical_loss": 5.956798371250791, "tokens_seen": 24117248 }, { "epoch": 0.01, "learning_rate": 0.0007380952380952381, "loss": 0.2583, "theoretical_loss": 5.9461609511673625, "tokens_seen": 24379392 }, { "epoch": 0.01, "learning_rate": 0.000746031746031746, "loss": 0.2625, "theoretical_loss": 5.935668940221127, "tokens_seen": 24641536 }, { "epoch": 0.01, "learning_rate": 0.000753968253968254, "loss": 0.261, "theoretical_loss": 5.92531883523999, "tokens_seen": 24903680 }, { "epoch": 0.01, "learning_rate": 0.0007619047619047619, "loss": 0.26, "theoretical_loss": 5.915107253197538, "tokens_seen": 25165824 }, { "epoch": 0.01, "learning_rate": 0.0007698412698412699, "loss": 0.2623, "theoretical_loss": 5.905030925891829, "tokens_seen": 25427968 }, { "epoch": 0.01, "learning_rate": 0.0007777777777777778, "loss": 0.2632, "theoretical_loss": 5.895086694911951, "tokens_seen": 25690112 }, { "epoch": 0.01, "learning_rate": 0.0007857142857142857, "loss": 0.2608, "theoretical_loss": 5.88527150687402, "tokens_seen": 25952256 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.016880027949810028, "objective/train/docs_used": 17033, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 5.177478313446045, "objective/train/original_loss": 5.177477836608887, "objective/train/theoretical_loss": 5.8755824089096285, "objective/train/tokens_used": 46674400, "objective/train/value_avg": -0.0229949951171875, "objective/train/value_loss": 0.000880128238350153, "objective/train/value_max": -0.00821685791015625, "objective/train/value_min": -0.06536865234375, "objective/train/value_reward_corr": 0.07602733877190453, "objective/train/value_std": 0.00830841064453125, "objective/train/weight_avg": 1.017284870147705, "objective/train/weighted_lm_loss": 5.27687406539917, "objective/train/weights_max": 1.0675524473190308, "objective/train/weights_min": 0.3755991756916046, "theoretical_loss": 5.8755824089096285, "tokens_seen": 26214400 }, { "epoch": 0.01, "learning_rate": 0.0007936507936507937, "loss": 0.2611, "theoretical_loss": 5.8755824089096285, "tokens_seen": 26214400 }, { "epoch": 0.01, "learning_rate": 0.0008015873015873017, "loss": 0.2562, "theoretical_loss": 5.866016544391016, "tokens_seen": 26476544 }, { "epoch": 0.01, "learning_rate": 0.0008095238095238096, "loss": 0.253, "theoretical_loss": 5.856571148878293, "tokens_seen": 26738688 }, { "epoch": 0.01, "learning_rate": 0.0008174603174603175, "loss": 0.2532, "theoretical_loss": 5.847243546275179, "tokens_seen": 27000832 }, { "epoch": 0.01, "learning_rate": 0.0008253968253968254, "loss": 0.2524, "theoretical_loss": 5.838031145180573, "tokens_seen": 27262976 }, { "epoch": 0.01, "learning_rate": 0.0008333333333333334, "loss": 0.257, "theoretical_loss": 5.82893143542425, "tokens_seen": 27525120 }, { "epoch": 0.01, "learning_rate": 0.0008412698412698413, "loss": 0.253, "theoretical_loss": 5.81994198477569, "tokens_seen": 27787264 }, { "epoch": 0.01, "learning_rate": 0.0008492063492063492, "loss": 0.2507, "theoretical_loss": 5.811060435815881, "tokens_seen": 28049408 }, { "epoch": 0.01, "learning_rate": 0.0008571428571428571, "loss": 0.2532, "theoretical_loss": 5.802284502962563, "tokens_seen": 28311552 }, { "epoch": 0.01, "learning_rate": 0.0008650793650793651, "loss": 0.2512, "theoretical_loss": 5.793611969640068, "tokens_seen": 28573696 }, { "epoch": 0.01, "learning_rate": 0.000873015873015873, "loss": 0.2509, "theoretical_loss": 5.785040685585437, "tokens_seen": 28835840 }, { "epoch": 0.01, "learning_rate": 0.0008809523809523809, "loss": 0.2497, "theoretical_loss": 5.7765685642831155, "tokens_seen": 29097984 }, { "epoch": 0.01, "learning_rate": 0.0008888888888888888, "loss": 0.2538, "theoretical_loss": 5.768193580520972, "tokens_seen": 29360128 }, { "epoch": 0.01, "learning_rate": 0.0008968253968253968, "loss": 0.2495, "theoretical_loss": 5.759913768060882, "tokens_seen": 29622272 }, { "epoch": 0.01, "learning_rate": 0.0009047619047619047, "loss": 0.2494, "theoretical_loss": 5.7517272174175496, "tokens_seen": 29884416 }, { "epoch": 0.01, "learning_rate": 0.0009126984126984126, "loss": 0.2498, "theoretical_loss": 5.743632073739626, "tokens_seen": 30146560 }, { "epoch": 0.01, "learning_rate": 0.0009206349206349207, "loss": 0.2478, "theoretical_loss": 5.735626534787584, "tokens_seen": 30408704 }, { "epoch": 0.01, "learning_rate": 0.0009285714285714287, "loss": 0.2429, "theoretical_loss": 5.727708849003127, "tokens_seen": 30670848 }, { "epoch": 0.01, "learning_rate": 0.0009365079365079366, "loss": 0.2443, "theoretical_loss": 5.719877313665254, "tokens_seen": 30932992 }, { "epoch": 0.01, "learning_rate": 0.0009444444444444445, "loss": 0.2456, "theoretical_loss": 5.712130273128388, "tokens_seen": 31195136 }, { "epoch": 0.01, "learning_rate": 0.0009523809523809524, "loss": 0.2472, "theoretical_loss": 5.704466117138258, "tokens_seen": 31457280 }, { "epoch": 0.01, "learning_rate": 0.0009603174603174604, "loss": 0.2447, "theoretical_loss": 5.696883279221504, "tokens_seen": 31719424 }, { "epoch": 0.01, "learning_rate": 0.0009682539682539683, "loss": 0.2421, "theoretical_loss": 5.689380235145171, "tokens_seen": 31981568 }, { "epoch": 0.01, "learning_rate": 0.0009761904761904762, "loss": 0.2447, "theoretical_loss": 5.6819555014425305, "tokens_seen": 32243712 }, { "epoch": 0.01, "learning_rate": 0.000984126984126984, "loss": 0.2412, "theoretical_loss": 5.674607634001871, "tokens_seen": 32505856 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.012133671902120113, "objective/train/docs_used": 19239, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.836267948150635, "objective/train/original_loss": 4.836267471313477, "objective/train/theoretical_loss": 5.667335226715059, "objective/train/tokens_used": 53228000, "objective/train/value_avg": -0.0186767578125, "objective/train/value_loss": 0.0005454737693071365, "objective/train/value_max": -0.00572967529296875, "objective/train/value_min": -0.0872802734375, "objective/train/value_reward_corr": 0.07709606794972773, "objective/train/value_std": 0.0084075927734375, "objective/train/weight_avg": 1.0123944282531738, "objective/train/weighted_lm_loss": 4.902460098266602, "objective/train/weights_max": 1.077897310256958, "objective/train/weights_min": 0.3729293644428253, "theoretical_loss": 5.667335226715059, "tokens_seen": 32768000 }, { "epoch": 0.01, "learning_rate": 0.000992063492063492, "loss": 0.2475, "theoretical_loss": 5.667335226715059, "tokens_seen": 32768000 }, { "epoch": 0.01, "learning_rate": 0.001, "loss": 0.243, "theoretical_loss": 5.6601369101828904, "tokens_seen": 33030144 }, { "epoch": 0.01, "learning_rate": 0.0009999197560584176, "loss": 0.2423, "theoretical_loss": 5.6530113504744435, "tokens_seen": 33292288 }, { "epoch": 0.01, "learning_rate": 0.0009998395121168352, "loss": 0.2419, "theoretical_loss": 5.645957247937725, "tokens_seen": 33554432 }, { "epoch": 0.01, "learning_rate": 0.0009997592681752529, "loss": 0.2402, "theoretical_loss": 5.638973336059157, "tokens_seen": 33816576 }, { "epoch": 0.01, "learning_rate": 0.0009996790242336705, "loss": 0.235, "theoretical_loss": 5.632058380369512, "tokens_seen": 34078720 }, { "epoch": 0.01, "learning_rate": 0.0009995987802920879, "loss": 0.2355, "theoretical_loss": 5.625211177394046, "tokens_seen": 34340864 }, { "epoch": 0.01, "learning_rate": 0.0009995185363505057, "loss": 0.2349, "theoretical_loss": 5.618430553644782, "tokens_seen": 34603008 }, { "epoch": 0.01, "learning_rate": 0.000999438292408923, "loss": 0.2367, "theoretical_loss": 5.611715364652864, "tokens_seen": 34865152 }, { "epoch": 0.01, "learning_rate": 0.0009993580484673407, "loss": 0.2314, "theoretical_loss": 5.605064494039176, "tokens_seen": 35127296 }, { "epoch": 0.01, "learning_rate": 0.0009992778045257583, "loss": 0.2299, "theoretical_loss": 5.598476852621397, "tokens_seen": 35389440 }, { "epoch": 0.01, "learning_rate": 0.000999197560584176, "loss": 0.2361, "theoretical_loss": 5.591951377555809, "tokens_seen": 35651584 }, { "epoch": 0.01, "learning_rate": 0.0009991173166425935, "loss": 0.2362, "theoretical_loss": 5.585487031512276, "tokens_seen": 35913728 }, { "epoch": 0.01, "learning_rate": 0.0009990370727010112, "loss": 0.2358, "theoretical_loss": 5.579082801880871, "tokens_seen": 36175872 }, { "epoch": 0.01, "learning_rate": 0.0009989568287594288, "loss": 0.2332, "theoretical_loss": 5.572737700008718, "tokens_seen": 36438016 }, { "epoch": 0.01, "learning_rate": 0.0009988765848178462, "loss": 0.2354, "theoretical_loss": 5.56645076046569, "tokens_seen": 36700160 }, { "epoch": 0.01, "learning_rate": 0.0009987963408762638, "loss": 0.2326, "theoretical_loss": 5.5602210403376775, "tokens_seen": 36962304 }, { "epoch": 0.01, "learning_rate": 0.0009987160969346814, "loss": 0.2372, "theoretical_loss": 5.554047618546193, "tokens_seen": 37224448 }, { "epoch": 0.01, "learning_rate": 0.000998635852993099, "loss": 0.2413, "theoretical_loss": 5.547929595193182, "tokens_seen": 37486592 }, { "epoch": 0.01, "learning_rate": 0.0009985556090515166, "loss": 0.2345, "theoretical_loss": 5.5418660909298945, "tokens_seen": 37748736 }, { "epoch": 0.01, "learning_rate": 0.0009984753651099342, "loss": 0.2308, "theoretical_loss": 5.535856246348814, "tokens_seen": 38010880 }, { "epoch": 0.01, "learning_rate": 0.0009983951211683518, "loss": 0.2326, "theoretical_loss": 5.529899221397624, "tokens_seen": 38273024 }, { "epoch": 0.01, "learning_rate": 0.0009983148772267695, "loss": 0.2299, "theoretical_loss": 5.523994194814273, "tokens_seen": 38535168 }, { "epoch": 0.01, "learning_rate": 0.000998234633285187, "loss": 0.2315, "theoretical_loss": 5.518140363582252, "tokens_seen": 38797312 }, { "epoch": 0.01, "learning_rate": 0.0009981543893436047, "loss": 0.2336, "theoretical_loss": 5.512336942405216, "tokens_seen": 39059456 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.007620680145919323, "objective/train/docs_used": 21760, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.368919372558594, "objective/train/original_loss": 4.368919372558594, "objective/train/theoretical_loss": 5.506583163200142, "objective/train/tokens_used": 59781600, "objective/train/value_avg": -0.0172576904296875, "objective/train/value_loss": 0.0010155023774132133, "objective/train/value_max": -0.00572967529296875, "objective/train/value_min": -0.12042236328125, "objective/train/value_reward_corr": 0.19761336452653674, "objective/train/value_std": 0.0086517333984375, "objective/train/weight_avg": 1.0080623626708984, "objective/train/weighted_lm_loss": 4.408056735992432, "objective/train/weights_max": 1.1209726333618164, "objective/train/weights_min": 0.3737069368362427, "theoretical_loss": 5.506583163200142, "tokens_seen": 39321600 }, { "epoch": 0.01, "learning_rate": 0.000998074145402022, "loss": 0.2265, "theoretical_loss": 5.506583163200142, "tokens_seen": 39321600 }, { "epoch": 0.01, "learning_rate": 0.0009979939014604397, "loss": 0.2311, "theoretical_loss": 5.5008782746082625, "tokens_seen": 39583744 }, { "epoch": 0.01, "learning_rate": 0.0009979136575188573, "loss": 0.2287, "theoretical_loss": 5.495221541523011, "tokens_seen": 39845888 }, { "epoch": 0.01, "learning_rate": 0.000997833413577275, "loss": 0.2302, "theoretical_loss": 5.489612244634316, "tokens_seen": 40108032 }, { "epoch": 0.01, "learning_rate": 0.0009977531696356925, "loss": 0.2287, "theoretical_loss": 5.48404967998854, "tokens_seen": 40370176 }, { "epoch": 0.01, "learning_rate": 0.0009976729256941101, "loss": 0.2282, "theoretical_loss": 5.478533158563456, "tokens_seen": 40632320 }, { "epoch": 0.01, "learning_rate": 0.0009975926817525277, "loss": 0.2321, "theoretical_loss": 5.473062005857637, "tokens_seen": 40894464 }, { "epoch": 0.01, "learning_rate": 0.0009975124378109451, "loss": 0.2303, "theoretical_loss": 5.467635561493681, "tokens_seen": 41156608 }, { "epoch": 0.01, "learning_rate": 0.000997432193869363, "loss": 0.2289, "theoretical_loss": 5.462253178834744, "tokens_seen": 41418752 }, { "epoch": 0.01, "learning_rate": 0.0009973519499277804, "loss": 0.225, "theoretical_loss": 5.456914224613812, "tokens_seen": 41680896 }, { "epoch": 0.01, "learning_rate": 0.0009972717059861982, "loss": 0.228, "theoretical_loss": 5.451618078575256, "tokens_seen": 41943040 }, { "epoch": 0.01, "learning_rate": 0.0009971914620446156, "loss": 0.2249, "theoretical_loss": 5.446364133128155, "tokens_seen": 42205184 }, { "epoch": 0.01, "learning_rate": 0.0009971112181030332, "loss": 0.224, "theoretical_loss": 5.44115179301095, "tokens_seen": 42467328 }, { "epoch": 0.01, "learning_rate": 0.0009970309741614508, "loss": 0.2268, "theoretical_loss": 5.435980474966981, "tokens_seen": 42729472 }, { "epoch": 0.01, "learning_rate": 0.0009969507302198684, "loss": 0.223, "theoretical_loss": 5.430849607430501, "tokens_seen": 42991616 }, { "epoch": 0.01, "learning_rate": 0.000996870486278286, "loss": 0.2224, "theoretical_loss": 5.425758630222747, "tokens_seen": 43253760 }, { "epoch": 0.01, "learning_rate": 0.0009967902423367037, "loss": 0.227, "theoretical_loss": 5.42070699425771, "tokens_seen": 43515904 }, { "epoch": 0.01, "learning_rate": 0.0009967099983951213, "loss": 0.2264, "theoretical_loss": 5.415694161257225, "tokens_seen": 43778048 }, { "epoch": 0.01, "learning_rate": 0.0009966297544535387, "loss": 0.2205, "theoretical_loss": 5.410719603475034, "tokens_seen": 44040192 }, { "epoch": 0.01, "learning_rate": 0.0009965495105119565, "loss": 0.2239, "theoretical_loss": 5.405782803429483, "tokens_seen": 44302336 }, { "epoch": 0.01, "learning_rate": 0.0009964692665703739, "loss": 0.2202, "theoretical_loss": 5.400883253644551, "tokens_seen": 44564480 }, { "epoch": 0.01, "learning_rate": 0.0009963890226287915, "loss": 0.224, "theoretical_loss": 5.396020456398885, "tokens_seen": 44826624 }, { "epoch": 0.01, "learning_rate": 0.0009963087786872091, "loss": 0.217, "theoretical_loss": 5.391193923482547, "tokens_seen": 45088768 }, { "epoch": 0.01, "learning_rate": 0.0009962285347456267, "loss": 0.2206, "theoretical_loss": 5.386403175961223, "tokens_seen": 45350912 }, { "epoch": 0.01, "learning_rate": 0.0009961482908040443, "loss": 0.2205, "theoretical_loss": 5.381647743947578, "tokens_seen": 45613056 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.008024879731237888, "objective/train/docs_used": 24173, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.302799701690674, "objective/train/original_loss": 4.302799701690674, "objective/train/theoretical_loss": 5.37692716637954, "objective/train/tokens_used": 66335200, "objective/train/value_avg": -0.017547607421875, "objective/train/value_loss": 0.0007267643231898546, "objective/train/value_max": -0.0055999755859375, "objective/train/value_min": -0.1319580078125, "objective/train/value_reward_corr": 0.2775875654342026, "objective/train/value_std": 0.0094757080078125, "objective/train/weight_avg": 1.0083508491516113, "objective/train/weighted_lm_loss": 4.344386577606201, "objective/train/weights_max": 1.0802388191223145, "objective/train/weights_min": 0.23186293244361877, "theoretical_loss": 5.37692716637954, "tokens_seen": 45875200 }, { "epoch": 0.01, "learning_rate": 0.000996068046862462, "loss": 0.223, "theoretical_loss": 5.37692716637954, "tokens_seen": 45875200 }, { "epoch": 0.01, "learning_rate": 0.0009959878029208796, "loss": 0.2202, "theoretical_loss": 5.372240990805237, "tokens_seen": 46137344 }, { "epoch": 0.01, "learning_rate": 0.0009959075589792972, "loss": 0.2182, "theoretical_loss": 5.367588773174377, "tokens_seen": 46399488 }, { "epoch": 0.01, "learning_rate": 0.0009958273150377146, "loss": 0.2236, "theoretical_loss": 5.36297007763582, "tokens_seen": 46661632 }, { "epoch": 0.01, "learning_rate": 0.0009957470710961322, "loss": 0.2225, "theoretical_loss": 5.358384476341126, "tokens_seen": 46923776 }, { "epoch": 0.01, "learning_rate": 0.0009956668271545498, "loss": 0.2186, "theoretical_loss": 5.353831549253895, "tokens_seen": 47185920 }, { "epoch": 0.01, "learning_rate": 0.0009955865832129674, "loss": 0.2205, "theoretical_loss": 5.349310883964664, "tokens_seen": 47448064 }, { "epoch": 0.01, "learning_rate": 0.000995506339271385, "loss": 0.2193, "theoretical_loss": 5.344822075511196, "tokens_seen": 47710208 }, { "epoch": 0.01, "learning_rate": 0.0009954260953298026, "loss": 0.2176, "theoretical_loss": 5.340364726203955, "tokens_seen": 47972352 }, { "epoch": 0.01, "learning_rate": 0.0009953458513882203, "loss": 0.2151, "theoretical_loss": 5.3359384454566055, "tokens_seen": 48234496 }, { "epoch": 0.01, "learning_rate": 0.0009952656074466376, "loss": 0.2138, "theoretical_loss": 5.331542849621357, "tokens_seen": 48496640 }, { "epoch": 0.01, "learning_rate": 0.0009951853635050555, "loss": 0.2168, "theoretical_loss": 5.327177561828993, "tokens_seen": 48758784 }, { "epoch": 0.01, "learning_rate": 0.0009951051195634729, "loss": 0.2189, "theoretical_loss": 5.32284221183342, "tokens_seen": 49020928 }, { "epoch": 0.01, "learning_rate": 0.0009950248756218905, "loss": 0.2206, "theoretical_loss": 5.318536435860599, "tokens_seen": 49283072 }, { "epoch": 0.02, "learning_rate": 0.000994944631680308, "loss": 0.2147, "theoretical_loss": 5.314259876461705, "tokens_seen": 49545216 }, { "epoch": 0.02, "learning_rate": 0.0009948643877387257, "loss": 0.2176, "theoretical_loss": 5.310012182370359, "tokens_seen": 49807360 }, { "epoch": 0.02, "learning_rate": 0.0009947841437971433, "loss": 0.214, "theoretical_loss": 5.305793008363841, "tokens_seen": 50069504 }, { "epoch": 0.02, "learning_rate": 0.000994703899855561, "loss": 0.2142, "theoretical_loss": 5.301602015128104, "tokens_seen": 50331648 }, { "epoch": 0.02, "learning_rate": 0.0009946236559139785, "loss": 0.2118, "theoretical_loss": 5.297438869126498, "tokens_seen": 50593792 }, { "epoch": 0.02, "learning_rate": 0.0009945434119723962, "loss": 0.2195, "theoretical_loss": 5.293303242472074, "tokens_seen": 50855936 }, { "epoch": 0.02, "learning_rate": 0.0009944631680308138, "loss": 0.2132, "theoretical_loss": 5.289194812803347, "tokens_seen": 51118080 }, { "epoch": 0.02, "learning_rate": 0.0009943829240892312, "loss": 0.2113, "theoretical_loss": 5.285113263163414, "tokens_seen": 51380224 }, { "epoch": 0.02, "learning_rate": 0.000994302680147649, "loss": 0.2133, "theoretical_loss": 5.2810582818823235, "tokens_seen": 51642368 }, { "epoch": 0.02, "learning_rate": 0.0009942224362060664, "loss": 0.2144, "theoretical_loss": 5.27702956246258, "tokens_seen": 51904512 }, { "epoch": 0.02, "learning_rate": 0.000994142192264484, "loss": 0.2125, "theoretical_loss": 5.273026803467695, "tokens_seen": 52166656 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.007762981578707695, "objective/train/docs_used": 26541, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.0233917236328125, "objective/train/original_loss": 4.023391246795654, "objective/train/theoretical_loss": 5.269049708413682, "objective/train/tokens_used": 72888800, "objective/train/value_avg": -0.0139312744140625, "objective/train/value_loss": 0.0003585839585866779, "objective/train/value_max": -0.004230499267578125, "objective/train/value_min": -0.0765380859375, "objective/train/value_reward_corr": 0.23784686314537432, "objective/train/value_std": 0.006893157958984375, "objective/train/weight_avg": 1.0079320669174194, "objective/train/weighted_lm_loss": 4.059372901916504, "objective/train/weights_max": 1.0704232454299927, "objective/train/weights_min": 0.3713054358959198, "theoretical_loss": 5.269049708413682, "tokens_seen": 52428800 }, { "epoch": 0.02, "learning_rate": 0.0009940619483229016, "loss": 0.2108, "theoretical_loss": 5.269049708413682, "tokens_seen": 52428800 }, { "epoch": 0.02, "learning_rate": 0.0009939817043813192, "loss": 0.2138, "theoretical_loss": 5.265097985663418, "tokens_seen": 52690944 }, { "epoch": 0.02, "learning_rate": 0.0009939014604397368, "loss": 0.2121, "theoretical_loss": 5.261171348323755, "tokens_seen": 52953088 }, { "epoch": 0.02, "learning_rate": 0.0009938212164981545, "loss": 0.2093, "theoretical_loss": 5.257269514145324, "tokens_seen": 53215232 }, { "epoch": 0.02, "learning_rate": 0.000993740972556572, "loss": 0.2068, "theoretical_loss": 5.2533922054249365, "tokens_seen": 53477376 }, { "epoch": 0.02, "learning_rate": 0.0009936607286149895, "loss": 0.2108, "theoretical_loss": 5.2495391489104986, "tokens_seen": 53739520 }, { "epoch": 0.02, "learning_rate": 0.0009935804846734073, "loss": 0.2142, "theoretical_loss": 5.24571007570837, "tokens_seen": 54001664 }, { "epoch": 0.02, "learning_rate": 0.0009935002407318247, "loss": 0.2101, "theoretical_loss": 5.2419047211930865, "tokens_seen": 54263808 }, { "epoch": 0.02, "learning_rate": 0.0009934199967902423, "loss": 0.2067, "theoretical_loss": 5.238122824919387, "tokens_seen": 54525952 }, { "epoch": 0.02, "learning_rate": 0.00099333975284866, "loss": 0.2105, "theoretical_loss": 5.234364130536457, "tokens_seen": 54788096 }, { "epoch": 0.02, "learning_rate": 0.0009932595089070775, "loss": 0.2097, "theoretical_loss": 5.230628385704337, "tokens_seen": 55050240 }, { "epoch": 0.02, "learning_rate": 0.0009931792649654951, "loss": 0.208, "theoretical_loss": 5.2269153420124255, "tokens_seen": 55312384 }, { "epoch": 0.02, "learning_rate": 0.0009930990210239128, "loss": 0.2087, "theoretical_loss": 5.223224754900014, "tokens_seen": 55574528 }, { "epoch": 0.02, "learning_rate": 0.0009930187770823304, "loss": 0.2098, "theoretical_loss": 5.219556383578795, "tokens_seen": 55836672 }, { "epoch": 0.02, "learning_rate": 0.000992938533140748, "loss": 0.2064, "theoretical_loss": 5.215909990957291, "tokens_seen": 56098816 }, { "epoch": 0.02, "learning_rate": 0.0009928582891991654, "loss": 0.2073, "theoretical_loss": 5.212285343567135, "tokens_seen": 56360960 }, { "epoch": 0.02, "learning_rate": 0.000992778045257583, "loss": 0.2064, "theoretical_loss": 5.208682211491157, "tokens_seen": 56623104 }, { "epoch": 0.02, "learning_rate": 0.0009926978013160006, "loss": 0.2043, "theoretical_loss": 5.205100368293225, "tokens_seen": 56885248 }, { "epoch": 0.02, "learning_rate": 0.0009926175573744182, "loss": 0.2038, "theoretical_loss": 5.201539590949796, "tokens_seen": 57147392 }, { "epoch": 0.02, "learning_rate": 0.0009925373134328358, "loss": 0.2055, "theoretical_loss": 5.1979996597831, "tokens_seen": 57409536 }, { "epoch": 0.02, "learning_rate": 0.0009924570694912534, "loss": 0.2043, "theoretical_loss": 5.1944803583959525, "tokens_seen": 57671680 }, { "epoch": 0.02, "learning_rate": 0.000992376825549671, "loss": 0.2057, "theoretical_loss": 5.190981473608112, "tokens_seen": 57933824 }, { "epoch": 0.02, "learning_rate": 0.0009922965816080887, "loss": 0.2065, "theoretical_loss": 5.18750279539416, "tokens_seen": 58195968 }, { "epoch": 0.02, "learning_rate": 0.0009922163376665063, "loss": 0.2025, "theoretical_loss": 5.184044116822849, "tokens_seen": 58458112 }, { "epoch": 0.02, "learning_rate": 0.0009921360937249237, "loss": 0.2079, "theoretical_loss": 5.1806052339978965, "tokens_seen": 58720256 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.005310404114425182, "objective/train/docs_used": 29071, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.910093069076538, "objective/train/original_loss": 3.910093069076538, "objective/train/theoretical_loss": 5.1771859460001455, "objective/train/tokens_used": 79442400, "objective/train/value_avg": -0.01593017578125, "objective/train/value_loss": 0.0005049873143434525, "objective/train/value_max": -0.004070281982421875, "objective/train/value_min": -0.09368896484375, "objective/train/value_reward_corr": 0.3727184924290943, "objective/train/value_std": 0.00925445556640625, "objective/train/weight_avg": 1.005548357963562, "objective/train/weighted_lm_loss": 3.935572624206543, "objective/train/weights_max": 1.0910027027130127, "objective/train/weights_min": 0.37153780460357666, "theoretical_loss": 5.1771859460001455, "tokens_seen": 58982400 }, { "epoch": 0.02, "learning_rate": 0.0009920558497833415, "loss": 0.2002, "theoretical_loss": 5.1771859460001455, "tokens_seen": 58982400 }, { "epoch": 0.02, "learning_rate": 0.000991975605841759, "loss": 0.2064, "theoretical_loss": 5.1737860548311065, "tokens_seen": 59244544 }, { "epoch": 0.02, "learning_rate": 0.0009918953619001765, "loss": 0.204, "theoretical_loss": 5.170405365357794, "tokens_seen": 59506688 }, { "epoch": 0.02, "learning_rate": 0.0009918151179585941, "loss": 0.2007, "theoretical_loss": 5.167043685258852, "tokens_seen": 59768832 }, { "epoch": 0.02, "learning_rate": 0.0009917348740170117, "loss": 0.2029, "theoretical_loss": 5.163700824971922, "tokens_seen": 60030976 }, { "epoch": 0.02, "learning_rate": 0.0009916546300754293, "loss": 0.2062, "theoretical_loss": 5.160376597642223, "tokens_seen": 60293120 }, { "epoch": 0.02, "learning_rate": 0.000991574386133847, "loss": 0.208, "theoretical_loss": 5.157070819072301, "tokens_seen": 60555264 }, { "epoch": 0.02, "learning_rate": 0.0009914941421922646, "loss": 0.2057, "theoretical_loss": 5.153783307672935, "tokens_seen": 60817408 }, { "epoch": 0.02, "learning_rate": 0.000991413898250682, "loss": 0.2046, "theoretical_loss": 5.150513884415149, "tokens_seen": 61079552 }, { "epoch": 0.02, "learning_rate": 0.0009913336543090998, "loss": 0.2011, "theoretical_loss": 5.14726237278331, "tokens_seen": 61341696 }, { "epoch": 0.02, "learning_rate": 0.0009912534103675172, "loss": 0.2057, "theoretical_loss": 5.144028598729285, "tokens_seen": 61603840 }, { "epoch": 0.02, "learning_rate": 0.0009911731664259348, "loss": 0.2008, "theoretical_loss": 5.140812390627624, "tokens_seen": 61865984 }, { "epoch": 0.02, "learning_rate": 0.0009910929224843524, "loss": 0.2033, "theoretical_loss": 5.137613579231737, "tokens_seen": 62128128 }, { "epoch": 0.02, "learning_rate": 0.00099101267854277, "loss": 0.2025, "theoretical_loss": 5.134431997631053, "tokens_seen": 62390272 }, { "epoch": 0.02, "learning_rate": 0.0009909324346011876, "loss": 0.1987, "theoretical_loss": 5.1312674812091235, "tokens_seen": 62652416 }, { "epoch": 0.02, "learning_rate": 0.0009908521906596053, "loss": 0.1978, "theoretical_loss": 5.128119867602646, "tokens_seen": 62914560 }, { "epoch": 0.02, "learning_rate": 0.0009907719467180229, "loss": 0.2007, "theoretical_loss": 5.124988996661393, "tokens_seen": 63176704 }, { "epoch": 0.02, "learning_rate": 0.0009906917027764405, "loss": 0.1967, "theoretical_loss": 5.121874710409012, "tokens_seen": 63438848 }, { "epoch": 0.02, "learning_rate": 0.000990611458834858, "loss": 0.1992, "theoretical_loss": 5.118776853004677, "tokens_seen": 63700992 }, { "epoch": 0.02, "learning_rate": 0.0009905312148932755, "loss": 0.1973, "theoretical_loss": 5.115695270705579, "tokens_seen": 63963136 }, { "epoch": 0.02, "learning_rate": 0.000990450970951693, "loss": 0.1952, "theoretical_loss": 5.112629811830217, "tokens_seen": 64225280 }, { "epoch": 0.02, "learning_rate": 0.0009903707270101107, "loss": 0.1983, "theoretical_loss": 5.10958032672248, "tokens_seen": 64487424 }, { "epoch": 0.02, "learning_rate": 0.0009902904830685283, "loss": 0.1941, "theoretical_loss": 5.106546667716508, "tokens_seen": 64749568 }, { "epoch": 0.02, "learning_rate": 0.000990210239126946, "loss": 0.2008, "theoretical_loss": 5.103528689102281, "tokens_seen": 65011712 }, { "epoch": 0.02, "learning_rate": 0.0009901299951853636, "loss": 0.1951, "theoretical_loss": 5.100526247091967, "tokens_seen": 65273856 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.008375543169677258, "objective/train/docs_used": 31526, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.9859790802001953, "objective/train/original_loss": 3.9859793186187744, "objective/train/theoretical_loss": 5.097539199786951, "objective/train/tokens_used": 85996000, "objective/train/value_avg": -0.0173187255859375, "objective/train/value_loss": 0.0005732561694458127, "objective/train/value_max": -0.004886627197265625, "objective/train/value_min": -0.1669921875, "objective/train/value_reward_corr": 0.3654351277123077, "objective/train/value_std": 0.0103912353515625, "objective/train/weight_avg": 1.0086482763290405, "objective/train/weighted_lm_loss": 4.023629665374756, "objective/train/weights_max": 1.1241106986999512, "objective/train/weights_min": 0.49856844544410706, "theoretical_loss": 5.097539199786951, "tokens_seen": 65536000 }, { "epoch": 0.02, "learning_rate": 0.0009900497512437812, "loss": 0.1982, "theoretical_loss": 5.097539199786951, "tokens_seen": 65536000 }, { "epoch": 0.02, "learning_rate": 0.0009899695073021988, "loss": 0.1987, "theoretical_loss": 5.094567407145588, "tokens_seen": 65798144 }, { "epoch": 0.02, "learning_rate": 0.0009898892633606162, "loss": 0.1958, "theoretical_loss": 5.09161073095161, "tokens_seen": 66060288 }, { "epoch": 0.02, "learning_rate": 0.000989809019419034, "loss": 0.1928, "theoretical_loss": 5.0886690347832015, "tokens_seen": 66322432 }, { "epoch": 0.02, "learning_rate": 0.0009897287754774514, "loss": 0.1945, "theoretical_loss": 5.0857421839827275, "tokens_seen": 66584576 }, { "epoch": 0.02, "learning_rate": 0.000989648531535869, "loss": 0.1912, "theoretical_loss": 5.082830045627072, "tokens_seen": 66846720 }, { "epoch": 0.02, "learning_rate": 0.0009895682875942866, "loss": 0.1962, "theoretical_loss": 5.079932488498602, "tokens_seen": 67108864 }, { "epoch": 0.02, "learning_rate": 0.0009894880436527042, "loss": 0.1935, "theoretical_loss": 5.077049383056725, "tokens_seen": 67371008 }, { "epoch": 0.02, "learning_rate": 0.0009894077997111218, "loss": 0.1957, "theoretical_loss": 5.074180601410026, "tokens_seen": 67633152 }, { "epoch": 0.02, "learning_rate": 0.0009893275557695395, "loss": 0.1884, "theoretical_loss": 5.0713260172889845, "tokens_seen": 67895296 }, { "epoch": 0.02, "learning_rate": 0.000989247311827957, "loss": 0.1908, "theoretical_loss": 5.068485506019231, "tokens_seen": 68157440 }, { "epoch": 0.02, "learning_rate": 0.0009891670678863745, "loss": 0.1931, "theoretical_loss": 5.06565894449535, "tokens_seen": 68419584 }, { "epoch": 0.02, "learning_rate": 0.0009890868239447923, "loss": 0.1936, "theoretical_loss": 5.06284621115523, "tokens_seen": 68681728 }, { "epoch": 0.02, "learning_rate": 0.0009890065800032097, "loss": 0.198, "theoretical_loss": 5.060047185954893, "tokens_seen": 68943872 }, { "epoch": 0.02, "learning_rate": 0.0009889263360616273, "loss": 0.1926, "theoretical_loss": 5.057261750343864, "tokens_seen": 69206016 }, { "epoch": 0.02, "learning_rate": 0.000988846092120045, "loss": 0.1934, "theoretical_loss": 5.0544897872410095, "tokens_seen": 69468160 }, { "epoch": 0.02, "learning_rate": 0.0009887658481784625, "loss": 0.192, "theoretical_loss": 5.051731181010866, "tokens_seen": 69730304 }, { "epoch": 0.02, "learning_rate": 0.0009886856042368801, "loss": 0.1905, "theoretical_loss": 5.048985817440432, "tokens_seen": 69992448 }, { "epoch": 0.02, "learning_rate": 0.0009886053602952978, "loss": 0.1939, "theoretical_loss": 5.046253583716425, "tokens_seen": 70254592 }, { "epoch": 0.02, "learning_rate": 0.0009885251163537154, "loss": 0.1908, "theoretical_loss": 5.043534368402973, "tokens_seen": 70516736 }, { "epoch": 0.02, "learning_rate": 0.000988444872412133, "loss": 0.19, "theoretical_loss": 5.040828061419762, "tokens_seen": 70778880 }, { "epoch": 0.02, "learning_rate": 0.0009883646284705506, "loss": 0.1928, "theoretical_loss": 5.038134554020587, "tokens_seen": 71041024 }, { "epoch": 0.02, "learning_rate": 0.000988284384528968, "loss": 0.1893, "theoretical_loss": 5.03545373877234, "tokens_seen": 71303168 }, { "epoch": 0.02, "learning_rate": 0.0009882041405873858, "loss": 0.1902, "theoretical_loss": 5.032785509534391, "tokens_seen": 71565312 }, { "epoch": 0.02, "learning_rate": 0.0009881238966458032, "loss": 0.1882, "theoretical_loss": 5.030129761438376, "tokens_seen": 71827456 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.006676941178739071, "objective/train/docs_used": 33987, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.692817211151123, "objective/train/original_loss": 3.692817211151123, "objective/train/theoretical_loss": 5.0274863908683685, "objective/train/tokens_used": 92549600, "objective/train/value_avg": -0.01511383056640625, "objective/train/value_loss": 0.0004530175356194377, "objective/train/value_max": -0.00426483154296875, "objective/train/value_min": -0.100341796875, "objective/train/value_reward_corr": 0.2687911557831162, "objective/train/value_std": 0.00835418701171875, "objective/train/weight_avg": 1.0068917274475098, "objective/train/weighted_lm_loss": 3.7194132804870605, "objective/train/weights_max": 1.0913752317428589, "objective/train/weights_min": 0.37156614661216736, "theoretical_loss": 5.0274863908683685, "tokens_seen": 72089600 }, { "epoch": 0.02, "learning_rate": 0.0009880436527042208, "loss": 0.1875, "theoretical_loss": 5.0274863908683685, "tokens_seen": 72089600 }, { "epoch": 0.02, "learning_rate": 0.0009879634087626384, "loss": 0.1911, "theoretical_loss": 5.024855295441432, "tokens_seen": 72351744 }, { "epoch": 0.02, "learning_rate": 0.000987883164821056, "loss": 0.189, "theoretical_loss": 5.022236373988544, "tokens_seen": 72613888 }, { "epoch": 0.02, "learning_rate": 0.0009878029208794737, "loss": 0.1874, "theoretical_loss": 5.01962952653588, "tokens_seen": 72876032 }, { "epoch": 0.02, "learning_rate": 0.0009877226769378913, "loss": 0.1878, "theoretical_loss": 5.017034654286462, "tokens_seen": 73138176 }, { "epoch": 0.02, "learning_rate": 0.000987642432996309, "loss": 0.19, "theoretical_loss": 5.0144516596021385, "tokens_seen": 73400320 }, { "epoch": 0.02, "learning_rate": 0.0009875621890547263, "loss": 0.1827, "theoretical_loss": 5.011880445985916, "tokens_seen": 73662464 }, { "epoch": 0.02, "learning_rate": 0.000987481945113144, "loss": 0.1884, "theoretical_loss": 5.009320918064615, "tokens_seen": 73924608 }, { "epoch": 0.02, "learning_rate": 0.0009874017011715615, "loss": 0.1884, "theoretical_loss": 5.006772981571855, "tokens_seen": 74186752 }, { "epoch": 0.02, "learning_rate": 0.0009873214572299791, "loss": 0.1866, "theoretical_loss": 5.004236543331345, "tokens_seen": 74448896 }, { "epoch": 0.02, "learning_rate": 0.0009872412132883967, "loss": 0.1851, "theoretical_loss": 5.001711511240506, "tokens_seen": 74711040 }, { "epoch": 0.02, "learning_rate": 0.0009871609693468144, "loss": 0.1864, "theoretical_loss": 4.999197794254371, "tokens_seen": 74973184 }, { "epoch": 0.02, "learning_rate": 0.000987080725405232, "loss": 0.1857, "theoretical_loss": 4.9966953023697975, "tokens_seen": 75235328 }, { "epoch": 0.02, "learning_rate": 0.0009870004814636496, "loss": 0.1855, "theoretical_loss": 4.994203946609964, "tokens_seen": 75497472 }, { "epoch": 0.02, "learning_rate": 0.000986920237522067, "loss": 0.1828, "theoretical_loss": 4.991723639009154, "tokens_seen": 75759616 }, { "epoch": 0.02, "learning_rate": 0.0009868399935804848, "loss": 0.1839, "theoretical_loss": 4.989254292597813, "tokens_seen": 76021760 }, { "epoch": 0.02, "learning_rate": 0.0009867597496389022, "loss": 0.1871, "theoretical_loss": 4.986795821387878, "tokens_seen": 76283904 }, { "epoch": 0.02, "learning_rate": 0.0009866795056973198, "loss": 0.1852, "theoretical_loss": 4.984348140358374, "tokens_seen": 76546048 }, { "epoch": 0.02, "learning_rate": 0.0009865992617557374, "loss": 0.1871, "theoretical_loss": 4.981911165441273, "tokens_seen": 76808192 }, { "epoch": 0.02, "learning_rate": 0.000986519017814155, "loss": 0.1845, "theoretical_loss": 4.979484813507599, "tokens_seen": 77070336 }, { "epoch": 0.02, "learning_rate": 0.0009864387738725726, "loss": 0.1835, "theoretical_loss": 4.977069002353792, "tokens_seen": 77332480 }, { "epoch": 0.02, "learning_rate": 0.0009863585299309903, "loss": 0.1813, "theoretical_loss": 4.974663650688306, "tokens_seen": 77594624 }, { "epoch": 0.02, "learning_rate": 0.0009862782859894079, "loss": 0.1833, "theoretical_loss": 4.972268678118454, "tokens_seen": 77856768 }, { "epoch": 0.02, "learning_rate": 0.0009861980420478253, "loss": 0.1796, "theoretical_loss": 4.969884005137479, "tokens_seen": 78118912 }, { "epoch": 0.02, "learning_rate": 0.000986117798106243, "loss": 0.1841, "theoretical_loss": 4.967509553111862, "tokens_seen": 78381056 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.007857042364776134, "objective/train/docs_used": 36308, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.7754344940185547, "objective/train/original_loss": 3.775434970855713, "objective/train/theoretical_loss": 4.96514524426884, "objective/train/tokens_used": 99103200, "objective/train/value_avg": -0.01514434814453125, "objective/train/value_loss": 0.0004333880788180977, "objective/train/value_max": -0.003795623779296875, "objective/train/value_min": -0.1292724609375, "objective/train/value_reward_corr": 0.27335943213418523, "objective/train/value_std": 0.0078125, "objective/train/weight_avg": 1.0080602169036865, "objective/train/weighted_lm_loss": 3.8082289695739746, "objective/train/weights_max": 1.1106642484664917, "objective/train/weights_min": 0.3715236186981201, "theoretical_loss": 4.96514524426884, "tokens_seen": 78643200 }, { "epoch": 0.02, "learning_rate": 0.0009860375541646605, "loss": 0.1803, "theoretical_loss": 4.96514524426884, "tokens_seen": 78643200 }, { "epoch": 0.02, "learning_rate": 0.0009859573102230783, "loss": 0.1828, "theoretical_loss": 4.962791001684167, "tokens_seen": 78905344 }, { "epoch": 0.02, "learning_rate": 0.0009858770662814957, "loss": 0.1812, "theoretical_loss": 4.960446749270055, "tokens_seen": 79167488 }, { "epoch": 0.02, "learning_rate": 0.0009857968223399133, "loss": 0.1833, "theoretical_loss": 4.958112411763365, "tokens_seen": 79429632 }, { "epoch": 0.02, "learning_rate": 0.000985716578398331, "loss": 0.1819, "theoretical_loss": 4.955787914713962, "tokens_seen": 79691776 }, { "epoch": 0.02, "learning_rate": 0.0009856363344567486, "loss": 0.1828, "theoretical_loss": 4.953473184473312, "tokens_seen": 79953920 }, { "epoch": 0.02, "learning_rate": 0.0009855560905151662, "loss": 0.1815, "theoretical_loss": 4.951168148183246, "tokens_seen": 80216064 }, { "epoch": 0.02, "learning_rate": 0.0009854758465735838, "loss": 0.1808, "theoretical_loss": 4.948872733764926, "tokens_seen": 80478208 }, { "epoch": 0.02, "learning_rate": 0.0009853956026320014, "loss": 0.1792, "theoretical_loss": 4.946586869908014, "tokens_seen": 80740352 }, { "epoch": 0.02, "learning_rate": 0.0009853153586904188, "loss": 0.1821, "theoretical_loss": 4.944310486060004, "tokens_seen": 81002496 }, { "epoch": 0.02, "learning_rate": 0.0009852351147488366, "loss": 0.1818, "theoretical_loss": 4.942043512415751, "tokens_seen": 81264640 }, { "epoch": 0.02, "learning_rate": 0.000985154870807254, "loss": 0.1836, "theoretical_loss": 4.939785879907176, "tokens_seen": 81526784 }, { "epoch": 0.02, "learning_rate": 0.0009850746268656716, "loss": 0.1805, "theoretical_loss": 4.937537520193139, "tokens_seen": 81788928 }, { "epoch": 0.02, "learning_rate": 0.0009849943829240892, "loss": 0.181, "theoretical_loss": 4.93529836564949, "tokens_seen": 82051072 }, { "epoch": 0.02, "learning_rate": 0.0009849141389825069, "loss": 0.1797, "theoretical_loss": 4.933068349359283, "tokens_seen": 82313216 }, { "epoch": 0.03, "learning_rate": 0.0009848338950409245, "loss": 0.1773, "theoretical_loss": 4.93084740510316, "tokens_seen": 82575360 }, { "epoch": 0.03, "learning_rate": 0.000984753651099342, "loss": 0.1776, "theoretical_loss": 4.928635467349885, "tokens_seen": 82837504 }, { "epoch": 0.03, "learning_rate": 0.0009846734071577597, "loss": 0.1764, "theoretical_loss": 4.92643247124705, "tokens_seen": 83099648 }, { "epoch": 0.03, "learning_rate": 0.0009845931632161773, "loss": 0.1805, "theoretical_loss": 4.924238352611924, "tokens_seen": 83361792 }, { "epoch": 0.03, "learning_rate": 0.0009845129192745947, "loss": 0.1789, "theoretical_loss": 4.922053047922455, "tokens_seen": 83623936 }, { "epoch": 0.03, "learning_rate": 0.0009844326753330123, "loss": 0.1756, "theoretical_loss": 4.919876494308432, "tokens_seen": 83886080 }, { "epoch": 0.03, "learning_rate": 0.00098435243139143, "loss": 0.1804, "theoretical_loss": 4.917708629542775, "tokens_seen": 84148224 }, { "epoch": 0.03, "learning_rate": 0.0009842721874498475, "loss": 0.1778, "theoretical_loss": 4.915549392032985, "tokens_seen": 84410368 }, { "epoch": 0.03, "learning_rate": 0.0009841919435082652, "loss": 0.1807, "theoretical_loss": 4.913398720812719, "tokens_seen": 84672512 }, { "epoch": 0.03, "learning_rate": 0.0009841116995666828, "loss": 0.1762, "theoretical_loss": 4.9112565555335115, "tokens_seen": 84934656 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.008176760748028755, "objective/train/docs_used": 38678, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.4570322036743164, "objective/train/original_loss": 3.4570322036743164, "objective/train/theoretical_loss": 4.909122836456632, "objective/train/tokens_used": 105656800, "objective/train/value_avg": -0.0123748779296875, "objective/train/value_loss": 0.00042682766797952354, "objective/train/value_max": -0.00313568115234375, "objective/train/value_min": -0.06854248046875, "objective/train/value_reward_corr": 0.12430929764516488, "objective/train/value_std": 0.005199432373046875, "objective/train/weight_avg": 1.0083661079406738, "objective/train/weighted_lm_loss": 3.4894821643829346, "objective/train/weights_max": 1.0679434537887573, "objective/train/weights_min": 0.3725539743900299, "theoretical_loss": 4.909122836456632, "tokens_seen": 85196800 }, { "epoch": 0.03, "learning_rate": 0.0009840314556251004, "loss": 0.1783, "theoretical_loss": 4.909122836456632, "tokens_seen": 85196800 }, { "epoch": 0.03, "learning_rate": 0.0009839512116835178, "loss": 0.1773, "theoretical_loss": 4.906997504445066, "tokens_seen": 85458944 }, { "epoch": 0.03, "learning_rate": 0.0009838709677419356, "loss": 0.1792, "theoretical_loss": 4.904880500955633, "tokens_seen": 85721088 }, { "epoch": 0.03, "learning_rate": 0.000983790723800353, "loss": 0.1752, "theoretical_loss": 4.90277176803123, "tokens_seen": 85983232 }, { "epoch": 0.03, "learning_rate": 0.0009837104798587706, "loss": 0.1747, "theoretical_loss": 4.9006712482931984, "tokens_seen": 86245376 }, { "epoch": 0.03, "learning_rate": 0.0009836302359171882, "loss": 0.176, "theoretical_loss": 4.89857888493381, "tokens_seen": 86507520 }, { "epoch": 0.03, "learning_rate": 0.0009835499919756058, "loss": 0.1777, "theoretical_loss": 4.896494621708882, "tokens_seen": 86769664 }, { "epoch": 0.03, "learning_rate": 0.0009834697480340234, "loss": 0.1748, "theoretical_loss": 4.8944184029305, "tokens_seen": 87031808 }, { "epoch": 0.03, "learning_rate": 0.000983389504092441, "loss": 0.1762, "theoretical_loss": 4.892350173459863, "tokens_seen": 87293952 }, { "epoch": 0.03, "learning_rate": 0.0009833092601508587, "loss": 0.1736, "theoretical_loss": 4.890289878700239, "tokens_seen": 87556096 }, { "epoch": 0.03, "learning_rate": 0.0009832290162092763, "loss": 0.1736, "theoretical_loss": 4.888237464590028, "tokens_seen": 87818240 }, { "epoch": 0.03, "learning_rate": 0.000983148772267694, "loss": 0.1769, "theoretical_loss": 4.8861928775959464, "tokens_seen": 88080384 }, { "epoch": 0.03, "learning_rate": 0.0009830685283261113, "loss": 0.1752, "theoretical_loss": 4.884156064706302, "tokens_seen": 88342528 }, { "epoch": 0.03, "learning_rate": 0.0009829882843845291, "loss": 0.1744, "theoretical_loss": 4.882126973424384, "tokens_seen": 88604672 }, { "epoch": 0.03, "learning_rate": 0.0009829080404429465, "loss": 0.1731, "theoretical_loss": 4.880105551761961, "tokens_seen": 88866816 }, { "epoch": 0.03, "learning_rate": 0.0009828277965013641, "loss": 0.1783, "theoretical_loss": 4.87809174823286, "tokens_seen": 89128960 }, { "epoch": 0.03, "learning_rate": 0.0009827475525597817, "loss": 0.1719, "theoretical_loss": 4.876085511846673, "tokens_seen": 89391104 }, { "epoch": 0.03, "learning_rate": 0.0009826673086181994, "loss": 0.1769, "theoretical_loss": 4.874086792102535, "tokens_seen": 89653248 }, { "epoch": 0.03, "learning_rate": 0.000982587064676617, "loss": 0.1751, "theoretical_loss": 4.872095538983015, "tokens_seen": 89915392 }, { "epoch": 0.03, "learning_rate": 0.0009825068207350346, "loss": 0.1779, "theoretical_loss": 4.870111702948094, "tokens_seen": 90177536 }, { "epoch": 0.03, "learning_rate": 0.0009824265767934522, "loss": 0.1747, "theoretical_loss": 4.868135234929232, "tokens_seen": 90439680 }, { "epoch": 0.03, "learning_rate": 0.0009823463328518696, "loss": 0.1749, "theoretical_loss": 4.866166086323535, "tokens_seen": 90701824 }, { "epoch": 0.03, "learning_rate": 0.0009822660889102874, "loss": 0.1782, "theoretical_loss": 4.864204208988003, "tokens_seen": 90963968 }, { "epoch": 0.03, "learning_rate": 0.0009821858449687048, "loss": 0.1738, "theoretical_loss": 4.86224955523387, "tokens_seen": 91226112 }, { "epoch": 0.03, "learning_rate": 0.0009821056010271224, "loss": 0.1774, "theoretical_loss": 4.860302077821023, "tokens_seen": 91488256 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005717393010854721, "objective/train/docs_used": 40748, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.387242317199707, "objective/train/original_loss": 3.387242555618286, "objective/train/theoretical_loss": 4.858361729952518, "objective/train/tokens_used": 112210400, "objective/train/value_avg": -0.01448822021484375, "objective/train/value_loss": 0.0005706871743313968, "objective/train/value_max": -0.0030155181884765625, "objective/train/value_min": -0.09588623046875, "objective/train/value_reward_corr": 0.3036400333241696, "objective/train/value_std": 0.007572174072265625, "objective/train/weight_avg": 1.0059770345687866, "objective/train/weighted_lm_loss": 3.4078683853149414, "objective/train/weights_max": 1.0813897848129272, "objective/train/weights_min": 0.37298911809921265, "theoretical_loss": 4.858361729952518, "tokens_seen": 91750400 }, { "epoch": 0.03, "learning_rate": 0.00098202535708554, "loss": 0.1731, "theoretical_loss": 4.858361729952518, "tokens_seen": 91750400 }, { "epoch": 0.03, "learning_rate": 0.0009819451131439577, "loss": 0.1724, "theoretical_loss": 4.856428465269159, "tokens_seen": 92012544 }, { "epoch": 0.03, "learning_rate": 0.0009818648692023753, "loss": 0.1731, "theoretical_loss": 4.8545022378441836, "tokens_seen": 92274688 }, { "epoch": 0.03, "learning_rate": 0.0009817846252607929, "loss": 0.1745, "theoretical_loss": 4.852583002178001, "tokens_seen": 92536832 }, { "epoch": 0.03, "learning_rate": 0.0009817043813192105, "loss": 0.1718, "theoretical_loss": 4.8506707131930344, "tokens_seen": 92798976 }, { "epoch": 0.03, "learning_rate": 0.000981624137377628, "loss": 0.1729, "theoretical_loss": 4.848765326228618, "tokens_seen": 93061120 }, { "epoch": 0.03, "learning_rate": 0.0009815438934360455, "loss": 0.1688, "theoretical_loss": 4.846866797035984, "tokens_seen": 93323264 }, { "epoch": 0.03, "learning_rate": 0.0009814636494944631, "loss": 0.1717, "theoretical_loss": 4.844975081773322, "tokens_seen": 93585408 }, { "epoch": 0.03, "learning_rate": 0.0009813834055528807, "loss": 0.1714, "theoretical_loss": 4.843090137000904, "tokens_seen": 93847552 }, { "epoch": 0.03, "learning_rate": 0.0009813031616112983, "loss": 0.1716, "theoretical_loss": 4.841211919676287, "tokens_seen": 94109696 }, { "epoch": 0.03, "learning_rate": 0.000981222917669716, "loss": 0.1673, "theoretical_loss": 4.839340387149586, "tokens_seen": 94371840 }, { "epoch": 0.03, "learning_rate": 0.0009811426737281336, "loss": 0.1707, "theoretical_loss": 4.837475497158817, "tokens_seen": 94633984 }, { "epoch": 0.03, "learning_rate": 0.0009810624297865512, "loss": 0.1687, "theoretical_loss": 4.835617207825303, "tokens_seen": 94896128 }, { "epoch": 0.03, "learning_rate": 0.0009809821858449686, "loss": 0.1669, "theoretical_loss": 4.83376547764915, "tokens_seen": 95158272 }, { "epoch": 0.03, "learning_rate": 0.0009809019419033864, "loss": 0.1717, "theoretical_loss": 4.831920265504792, "tokens_seen": 95420416 }, { "epoch": 0.03, "learning_rate": 0.0009808216979618038, "loss": 0.1723, "theoretical_loss": 4.830081530636594, "tokens_seen": 95682560 }, { "epoch": 0.03, "learning_rate": 0.0009807414540202216, "loss": 0.1717, "theoretical_loss": 4.8282492326545245, "tokens_seen": 95944704 }, { "epoch": 0.03, "learning_rate": 0.000980661210078639, "loss": 0.1714, "theoretical_loss": 4.826423331529884, "tokens_seen": 96206848 }, { "epoch": 0.03, "learning_rate": 0.0009805809661370566, "loss": 0.1687, "theoretical_loss": 4.824603787591102, "tokens_seen": 96468992 }, { "epoch": 0.03, "learning_rate": 0.0009805007221954742, "loss": 0.1724, "theoretical_loss": 4.822790561519591, "tokens_seen": 96731136 }, { "epoch": 0.03, "learning_rate": 0.0009804204782538919, "loss": 0.1713, "theoretical_loss": 4.8209836143456535, "tokens_seen": 96993280 }, { "epoch": 0.03, "learning_rate": 0.0009803402343123095, "loss": 0.167, "theoretical_loss": 4.81918290744446, "tokens_seen": 97255424 }, { "epoch": 0.03, "learning_rate": 0.000980259990370727, "loss": 0.1705, "theoretical_loss": 4.817388402532074, "tokens_seen": 97517568 }, { "epoch": 0.03, "learning_rate": 0.0009801797464291447, "loss": 0.176, "theoretical_loss": 4.815600061661536, "tokens_seen": 97779712 }, { "epoch": 0.03, "learning_rate": 0.000980099502487562, "loss": 0.17, "theoretical_loss": 4.813817847219008, "tokens_seen": 98041856 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005961360409855843, "objective/train/docs_used": 43181, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.3599514961242676, "objective/train/original_loss": 3.3599514961242676, "objective/train/theoretical_loss": 4.812041721919962, "objective/train/tokens_used": 118764000, "objective/train/value_avg": -0.01464080810546875, "objective/train/value_loss": 0.0005292649148032069, "objective/train/value_max": -0.003040313720703125, "objective/train/value_min": -0.2239990234375, "objective/train/value_reward_corr": 0.26425426870008045, "objective/train/value_std": 0.0106048583984375, "objective/train/weight_avg": 1.0062075853347778, "objective/train/weighted_lm_loss": 3.379601001739502, "objective/train/weights_max": 1.1572779417037964, "objective/train/weights_min": 0.37088075280189514, "theoretical_loss": 4.812041721919962, "tokens_seen": 98304000 }, { "epoch": 0.03, "learning_rate": 0.00098001925854598, "loss": 0.165, "theoretical_loss": 4.812041721919962, "tokens_seen": 98304000 }, { "epoch": 0.03, "learning_rate": 0.0009799390146043973, "loss": 0.1663, "theoretical_loss": 4.810271648805427, "tokens_seen": 98566144 }, { "epoch": 0.03, "learning_rate": 0.000979858770662815, "loss": 0.17, "theoretical_loss": 4.8085075912383015, "tokens_seen": 98828288 }, { "epoch": 0.03, "learning_rate": 0.0009797785267212325, "loss": 0.1685, "theoretical_loss": 4.806749512899687, "tokens_seen": 99090432 }, { "epoch": 0.03, "learning_rate": 0.0009796982827796502, "loss": 0.1684, "theoretical_loss": 4.804997377785307, "tokens_seen": 99352576 }, { "epoch": 0.03, "learning_rate": 0.0009796180388380678, "loss": 0.1671, "theoretical_loss": 4.8032511502019535, "tokens_seen": 99614720 }, { "epoch": 0.03, "learning_rate": 0.0009795377948964854, "loss": 0.1671, "theoretical_loss": 4.801510794763988, "tokens_seen": 99876864 }, { "epoch": 0.03, "learning_rate": 0.000979457550954903, "loss": 0.1664, "theoretical_loss": 4.799776276389897, "tokens_seen": 100139008 }, { "epoch": 0.03, "learning_rate": 0.0009793773070133206, "loss": 0.1693, "theoretical_loss": 4.798047560298882, "tokens_seen": 100401152 }, { "epoch": 0.03, "learning_rate": 0.0009792970630717382, "loss": 0.1692, "theoretical_loss": 4.796324612007515, "tokens_seen": 100663296 }, { "epoch": 0.03, "learning_rate": 0.0009792168191301556, "loss": 0.1668, "theoretical_loss": 4.794607397326421, "tokens_seen": 100925440 }, { "epoch": 0.03, "learning_rate": 0.0009791365751885732, "loss": 0.1686, "theoretical_loss": 4.792895882357019, "tokens_seen": 101187584 }, { "epoch": 0.03, "learning_rate": 0.0009790563312469908, "loss": 0.1683, "theoretical_loss": 4.791190033488302, "tokens_seen": 101449728 }, { "epoch": 0.03, "learning_rate": 0.0009789760873054085, "loss": 0.1653, "theoretical_loss": 4.7894898173936635, "tokens_seen": 101711872 }, { "epoch": 0.03, "learning_rate": 0.000978895843363826, "loss": 0.1643, "theoretical_loss": 4.787795201027757, "tokens_seen": 101974016 }, { "epoch": 0.03, "learning_rate": 0.0009788155994222437, "loss": 0.1674, "theoretical_loss": 4.786106151623423, "tokens_seen": 102236160 }, { "epoch": 0.03, "learning_rate": 0.0009787353554806613, "loss": 0.1617, "theoretical_loss": 4.784422636688622, "tokens_seen": 102498304 }, { "epoch": 0.03, "learning_rate": 0.000978655111539079, "loss": 0.1637, "theoretical_loss": 4.782744624003442, "tokens_seen": 102760448 }, { "epoch": 0.03, "learning_rate": 0.0009785748675974963, "loss": 0.1669, "theoretical_loss": 4.781072081617127, "tokens_seen": 103022592 }, { "epoch": 0.03, "learning_rate": 0.000978494623655914, "loss": 0.1658, "theoretical_loss": 4.779404977845148, "tokens_seen": 103284736 }, { "epoch": 0.03, "learning_rate": 0.0009784143797143315, "loss": 0.1642, "theoretical_loss": 4.777743281266321, "tokens_seen": 103546880 }, { "epoch": 0.03, "learning_rate": 0.0009783341357727491, "loss": 0.1616, "theoretical_loss": 4.776086960719956, "tokens_seen": 103809024 }, { "epoch": 0.03, "learning_rate": 0.0009782538918311668, "loss": 0.1677, "theoretical_loss": 4.774435985303043, "tokens_seen": 104071168 }, { "epoch": 0.03, "learning_rate": 0.0009781736478895844, "loss": 0.1606, "theoretical_loss": 4.772790324367482, "tokens_seen": 104333312 }, { "epoch": 0.03, "learning_rate": 0.000978093403948002, "loss": 0.1655, "theoretical_loss": 4.771149947517346, "tokens_seen": 104595456 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005723345559090376, "objective/train/docs_used": 45444, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.301978349685669, "objective/train/original_loss": 3.3019778728485107, "objective/train/theoretical_loss": 4.769514824606178, "objective/train/tokens_used": 125317600, "objective/train/value_avg": -0.01215362548828125, "objective/train/value_loss": 0.000492334773298353, "objective/train/value_max": -0.0025310516357421875, "objective/train/value_min": -0.21240234375, "objective/train/value_reward_corr": 0.35604829265860916, "objective/train/value_std": 0.007457733154296875, "objective/train/weight_avg": 1.005944848060608, "objective/train/weighted_lm_loss": 3.3237457275390625, "objective/train/weights_max": 1.124742031097412, "objective/train/weights_min": 0.3697309195995331, "theoretical_loss": 4.769514824606178, "tokens_seen": 104857600 }, { "epoch": 0.03, "learning_rate": 0.0009780131600064196, "loss": 0.1616, "theoretical_loss": 4.769514824606178, "tokens_seen": 104857600 }, { "epoch": 0.03, "learning_rate": 0.0009779329160648372, "loss": 0.1593, "theoretical_loss": 4.7678849257343305, "tokens_seen": 105119744 }, { "epoch": 0.03, "learning_rate": 0.0009778526721232546, "loss": 0.1619, "theoretical_loss": 4.766260221246329, "tokens_seen": 105381888 }, { "epoch": 0.03, "learning_rate": 0.0009777724281816724, "loss": 0.1613, "theoretical_loss": 4.764640681728281, "tokens_seen": 105644032 }, { "epoch": 0.03, "learning_rate": 0.0009776921842400898, "loss": 0.1637, "theoretical_loss": 4.763026278005314, "tokens_seen": 105906176 }, { "epoch": 0.03, "learning_rate": 0.0009776119402985074, "loss": 0.1604, "theoretical_loss": 4.761416981139046, "tokens_seen": 106168320 }, { "epoch": 0.03, "learning_rate": 0.000977531696356925, "loss": 0.1622, "theoretical_loss": 4.75981276242509, "tokens_seen": 106430464 }, { "epoch": 0.03, "learning_rate": 0.0009774514524153427, "loss": 0.1616, "theoretical_loss": 4.758213593390595, "tokens_seen": 106692608 }, { "epoch": 0.03, "learning_rate": 0.0009773712084737603, "loss": 0.1605, "theoretical_loss": 4.756619445791808, "tokens_seen": 106954752 }, { "epoch": 0.03, "learning_rate": 0.0009772909645321779, "loss": 0.1627, "theoretical_loss": 4.755030291611678, "tokens_seen": 107216896 }, { "epoch": 0.03, "learning_rate": 0.0009772107205905955, "loss": 0.1632, "theoretical_loss": 4.753446103057492, "tokens_seen": 107479040 }, { "epoch": 0.03, "learning_rate": 0.000977130476649013, "loss": 0.1618, "theoretical_loss": 4.751866852558529, "tokens_seen": 107741184 }, { "epoch": 0.03, "learning_rate": 0.0009770502327074307, "loss": 0.1627, "theoretical_loss": 4.7502925127637585, "tokens_seen": 108003328 }, { "epoch": 0.03, "learning_rate": 0.0009769699887658481, "loss": 0.162, "theoretical_loss": 4.7487230565395535, "tokens_seen": 108265472 }, { "epoch": 0.03, "learning_rate": 0.0009768897448242657, "loss": 0.1609, "theoretical_loss": 4.747158456967452, "tokens_seen": 108527616 }, { "epoch": 0.03, "learning_rate": 0.0009768095008826833, "loss": 0.1614, "theoretical_loss": 4.745598687341927, "tokens_seen": 108789760 }, { "epoch": 0.03, "learning_rate": 0.000976729256941101, "loss": 0.1622, "theoretical_loss": 4.744043721168196, "tokens_seen": 109051904 }, { "epoch": 0.03, "learning_rate": 0.0009766490129995186, "loss": 0.1593, "theoretical_loss": 4.74249353216006, "tokens_seen": 109314048 }, { "epoch": 0.03, "learning_rate": 0.0009765687690579362, "loss": 0.1589, "theoretical_loss": 4.740948094237761, "tokens_seen": 109576192 }, { "epoch": 0.03, "learning_rate": 0.0009764885251163538, "loss": 0.1604, "theoretical_loss": 4.739407381525874, "tokens_seen": 109838336 }, { "epoch": 0.03, "learning_rate": 0.0009764082811747713, "loss": 0.1601, "theoretical_loss": 4.7378713683512235, "tokens_seen": 110100480 }, { "epoch": 0.03, "learning_rate": 0.0009763280372331889, "loss": 0.1592, "theoretical_loss": 4.7363400292408215, "tokens_seen": 110362624 }, { "epoch": 0.03, "learning_rate": 0.0009762477932916065, "loss": 0.1597, "theoretical_loss": 4.734813338919842, "tokens_seen": 110624768 }, { "epoch": 0.03, "learning_rate": 0.000976167549350024, "loss": 0.1574, "theoretical_loss": 4.733291272309609, "tokens_seen": 110886912 }, { "epoch": 0.03, "learning_rate": 0.0009760873054084417, "loss": 0.1573, "theoretical_loss": 4.731773804525616, "tokens_seen": 111149056 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005369019228965044, "objective/train/docs_used": 47788, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.1628293991088867, "objective/train/original_loss": 3.1628289222717285, "objective/train/theoretical_loss": 4.73026091087557, "objective/train/tokens_used": 131871200, "objective/train/value_avg": -0.0128021240234375, "objective/train/value_loss": 0.0003724124107975513, "objective/train/value_max": -0.0025615692138671875, "objective/train/value_min": -0.206787109375, "objective/train/value_reward_corr": 0.46127366019409083, "objective/train/value_std": 0.007617950439453125, "objective/train/weight_avg": 1.0055452585220337, "objective/train/weighted_lm_loss": 3.180939197540283, "objective/train/weights_max": 1.0874791145324707, "objective/train/weights_min": 0.3774433732032776, "theoretical_loss": 4.73026091087557, "tokens_seen": 111411200 }, { "epoch": 0.03, "learning_rate": 0.0009760070614668593, "loss": 0.1562, "theoretical_loss": 4.73026091087557, "tokens_seen": 111411200 }, { "epoch": 0.03, "learning_rate": 0.0009759268175252769, "loss": 0.1578, "theoretical_loss": 4.728752566857459, "tokens_seen": 111673344 }, { "epoch": 0.03, "learning_rate": 0.0009758465735836945, "loss": 0.1577, "theoretical_loss": 4.72724874815764, "tokens_seen": 111935488 }, { "epoch": 0.03, "learning_rate": 0.0009757663296421121, "loss": 0.1575, "theoretical_loss": 4.725749430648958, "tokens_seen": 112197632 }, { "epoch": 0.03, "learning_rate": 0.0009756860857005296, "loss": 0.1584, "theoretical_loss": 4.724254590388881, "tokens_seen": 112459776 }, { "epoch": 0.03, "learning_rate": 0.0009756058417589472, "loss": 0.1563, "theoretical_loss": 4.722764203617663, "tokens_seen": 112721920 }, { "epoch": 0.03, "learning_rate": 0.0009755255978173648, "loss": 0.1581, "theoretical_loss": 4.7212782467565235, "tokens_seen": 112984064 }, { "epoch": 0.03, "learning_rate": 0.0009754453538757823, "loss": 0.1575, "theoretical_loss": 4.719796696405858, "tokens_seen": 113246208 }, { "epoch": 0.03, "learning_rate": 0.0009753651099342, "loss": 0.1553, "theoretical_loss": 4.718319529343462, "tokens_seen": 113508352 }, { "epoch": 0.03, "learning_rate": 0.0009752848659926175, "loss": 0.1576, "theoretical_loss": 4.716846722522781, "tokens_seen": 113770496 }, { "epoch": 0.03, "learning_rate": 0.0009752046220510353, "loss": 0.1545, "theoretical_loss": 4.715378253071181, "tokens_seen": 114032640 }, { "epoch": 0.03, "learning_rate": 0.0009751243781094528, "loss": 0.1534, "theoretical_loss": 4.713914098288242, "tokens_seen": 114294784 }, { "epoch": 0.03, "learning_rate": 0.0009750441341678703, "loss": 0.1551, "theoretical_loss": 4.712454235644064, "tokens_seen": 114556928 }, { "epoch": 0.03, "learning_rate": 0.000974963890226288, "loss": 0.155, "theoretical_loss": 4.710998642777606, "tokens_seen": 114819072 }, { "epoch": 0.03, "learning_rate": 0.0009748836462847055, "loss": 0.1589, "theoretical_loss": 4.709547297495034, "tokens_seen": 115081216 }, { "epoch": 0.03, "learning_rate": 0.0009748034023431231, "loss": 0.1516, "theoretical_loss": 4.708100177768094, "tokens_seen": 115343360 }, { "epoch": 0.04, "learning_rate": 0.0009747231584015407, "loss": 0.155, "theoretical_loss": 4.7066572617325075, "tokens_seen": 115605504 }, { "epoch": 0.04, "learning_rate": 0.0009746429144599583, "loss": 0.1575, "theoretical_loss": 4.705218527686375, "tokens_seen": 115867648 }, { "epoch": 0.04, "learning_rate": 0.0009745626705183758, "loss": 0.1577, "theoretical_loss": 4.703783954088612, "tokens_seen": 116129792 }, { "epoch": 0.04, "learning_rate": 0.0009744824265767935, "loss": 0.1576, "theoretical_loss": 4.702353519557398, "tokens_seen": 116391936 }, { "epoch": 0.04, "learning_rate": 0.0009744021826352111, "loss": 0.1537, "theoretical_loss": 4.700927202868639, "tokens_seen": 116654080 }, { "epoch": 0.04, "learning_rate": 0.0009743219386936286, "loss": 0.1527, "theoretical_loss": 4.699504982954452, "tokens_seen": 116916224 }, { "epoch": 0.04, "learning_rate": 0.0009742416947520463, "loss": 0.1534, "theoretical_loss": 4.698086838901676, "tokens_seen": 117178368 }, { "epoch": 0.04, "learning_rate": 0.0009741614508104638, "loss": 0.1519, "theoretical_loss": 4.696672749950385, "tokens_seen": 117440512 }, { "epoch": 0.04, "learning_rate": 0.0009740812068688814, "loss": 0.1517, "theoretical_loss": 4.695262695492428, "tokens_seen": 117702656 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.006793029140681028, "objective/train/docs_used": 50256, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.074131488800049, "objective/train/original_loss": 3.0741310119628906, "objective/train/theoretical_loss": 4.693856655069986, "objective/train/tokens_used": 138424800, "objective/train/value_avg": -0.0142974853515625, "objective/train/value_loss": 0.000627185101620853, "objective/train/value_max": -0.00250244140625, "objective/train/value_min": -0.256591796875, "objective/train/value_reward_corr": 0.2673630119001533, "objective/train/value_std": 0.0104522705078125, "objective/train/weight_avg": 1.0070631504058838, "objective/train/weighted_lm_loss": 3.0982470512390137, "objective/train/weights_max": 1.1906453371047974, "objective/train/weights_min": 0.2276872992515564, "theoretical_loss": 4.693856655069986, "tokens_seen": 117964800 }, { "epoch": 0.04, "learning_rate": 0.000974000962927299, "loss": 0.1531, "theoretical_loss": 4.693856655069986, "tokens_seen": 117964800 }, { "epoch": 0.04, "learning_rate": 0.0009739207189857165, "loss": 0.1542, "theoretical_loss": 4.692454608374145, "tokens_seen": 118226944 }, { "epoch": 0.04, "learning_rate": 0.0009738404750441343, "loss": 0.1509, "theoretical_loss": 4.6910565352434785, "tokens_seen": 118489088 }, { "epoch": 0.04, "learning_rate": 0.0009737602311025518, "loss": 0.1512, "theoretical_loss": 4.689662415662662, "tokens_seen": 118751232 }, { "epoch": 0.04, "learning_rate": 0.0009736799871609694, "loss": 0.1504, "theoretical_loss": 4.688272229761087, "tokens_seen": 119013376 }, { "epoch": 0.04, "learning_rate": 0.000973599743219387, "loss": 0.1515, "theoretical_loss": 4.686885957811503, "tokens_seen": 119275520 }, { "epoch": 0.04, "learning_rate": 0.0009735194992778046, "loss": 0.147, "theoretical_loss": 4.685503580228671, "tokens_seen": 119537664 }, { "epoch": 0.04, "learning_rate": 0.0009734392553362221, "loss": 0.1475, "theoretical_loss": 4.684125077568028, "tokens_seen": 119799808 }, { "epoch": 0.04, "learning_rate": 0.0009733590113946397, "loss": 0.1524, "theoretical_loss": 4.682750430524376, "tokens_seen": 120061952 }, { "epoch": 0.04, "learning_rate": 0.0009732787674530573, "loss": 0.1518, "theoretical_loss": 4.6813796199305795, "tokens_seen": 120324096 }, { "epoch": 0.04, "learning_rate": 0.0009731985235114748, "loss": 0.151, "theoretical_loss": 4.68001262675628, "tokens_seen": 120586240 }, { "epoch": 0.04, "learning_rate": 0.0009731182795698925, "loss": 0.1465, "theoretical_loss": 4.678649432106627, "tokens_seen": 120848384 }, { "epoch": 0.04, "learning_rate": 0.00097303803562831, "loss": 0.1496, "theoretical_loss": 4.677290017221017, "tokens_seen": 121110528 }, { "epoch": 0.04, "learning_rate": 0.0009729577916867277, "loss": 0.1461, "theoretical_loss": 4.675934363471857, "tokens_seen": 121372672 }, { "epoch": 0.04, "learning_rate": 0.0009728775477451453, "loss": 0.1474, "theoretical_loss": 4.674582452363334, "tokens_seen": 121634816 }, { "epoch": 0.04, "learning_rate": 0.0009727973038035629, "loss": 0.1463, "theoretical_loss": 4.673234265530201, "tokens_seen": 121896960 }, { "epoch": 0.04, "learning_rate": 0.0009727170598619804, "loss": 0.1463, "theoretical_loss": 4.671889784736576, "tokens_seen": 122159104 }, { "epoch": 0.04, "learning_rate": 0.000972636815920398, "loss": 0.1478, "theoretical_loss": 4.670548991874758, "tokens_seen": 122421248 }, { "epoch": 0.04, "learning_rate": 0.0009725565719788156, "loss": 0.1463, "theoretical_loss": 4.669211868964052, "tokens_seen": 122683392 }, { "epoch": 0.04, "learning_rate": 0.0009724763280372332, "loss": 0.1496, "theoretical_loss": 4.66787839814961, "tokens_seen": 122945536 }, { "epoch": 0.04, "learning_rate": 0.0009723960840956508, "loss": 0.1477, "theoretical_loss": 4.666548561701285, "tokens_seen": 123207680 }, { "epoch": 0.04, "learning_rate": 0.0009723158401540683, "loss": 0.1463, "theoretical_loss": 4.665222342012491, "tokens_seen": 123469824 }, { "epoch": 0.04, "learning_rate": 0.0009722355962124861, "loss": 0.1472, "theoretical_loss": 4.663899721599093, "tokens_seen": 123731968 }, { "epoch": 0.04, "learning_rate": 0.0009721553522709036, "loss": 0.1446, "theoretical_loss": 4.6625806830982865, "tokens_seen": 123994112 }, { "epoch": 0.04, "learning_rate": 0.0009720751083293211, "loss": 0.1434, "theoretical_loss": 4.661265209267507, "tokens_seen": 124256256 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.006344511639326811, "objective/train/docs_used": 52572, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.8837485313415527, "objective/train/original_loss": 2.8837485313415527, "objective/train/theoretical_loss": 4.659953282983348, "objective/train/tokens_used": 144978400, "objective/train/value_avg": -0.011322021484375, "objective/train/value_loss": 0.0003046532510779798, "objective/train/value_max": -0.001964569091796875, "objective/train/value_min": -0.193603515625, "objective/train/value_reward_corr": 0.3217214219555613, "objective/train/value_std": 0.007747650146484375, "objective/train/weight_avg": 1.0064884424209595, "objective/train/weighted_lm_loss": 2.9043095111846924, "objective/train/weights_max": 1.1398353576660156, "objective/train/weights_min": 0.37116101384162903, "theoretical_loss": 4.659953282983348, "tokens_seen": 124518400 }, { "epoch": 0.04, "learning_rate": 0.0009719948643877388, "loss": 0.1463, "theoretical_loss": 4.659953282983348, "tokens_seen": 124518400 }, { "epoch": 0.04, "learning_rate": 0.0009719146204461563, "loss": 0.1482, "theoretical_loss": 4.658644887240481, "tokens_seen": 124780544 }, { "epoch": 0.04, "learning_rate": 0.0009718343765045739, "loss": 0.147, "theoretical_loss": 4.657340005150602, "tokens_seen": 125042688 }, { "epoch": 0.04, "learning_rate": 0.0009717541325629915, "loss": 0.1462, "theoretical_loss": 4.656038619941382, "tokens_seen": 125304832 }, { "epoch": 0.04, "learning_rate": 0.0009716738886214091, "loss": 0.1434, "theoretical_loss": 4.654740714955429, "tokens_seen": 125566976 }, { "epoch": 0.04, "learning_rate": 0.0009715936446798266, "loss": 0.1433, "theoretical_loss": 4.653446273649259, "tokens_seen": 125829120 }, { "epoch": 0.04, "learning_rate": 0.0009715134007382443, "loss": 0.1455, "theoretical_loss": 4.652155279592286, "tokens_seen": 126091264 }, { "epoch": 0.04, "learning_rate": 0.0009714331567966619, "loss": 0.1429, "theoretical_loss": 4.650867716465819, "tokens_seen": 126353408 }, { "epoch": 0.04, "learning_rate": 0.0009713529128550794, "loss": 0.1428, "theoretical_loss": 4.6495835680620665, "tokens_seen": 126615552 }, { "epoch": 0.04, "learning_rate": 0.0009712726689134971, "loss": 0.1406, "theoretical_loss": 4.648302818283158, "tokens_seen": 126877696 }, { "epoch": 0.04, "learning_rate": 0.0009711924249719146, "loss": 0.1459, "theoretical_loss": 4.64702545114017, "tokens_seen": 127139840 }, { "epoch": 0.04, "learning_rate": 0.0009711121810303323, "loss": 0.1425, "theoretical_loss": 4.645751450752172, "tokens_seen": 127401984 }, { "epoch": 0.04, "learning_rate": 0.0009710319370887498, "loss": 0.1386, "theoretical_loss": 4.644480801345268, "tokens_seen": 127664128 }, { "epoch": 0.04, "learning_rate": 0.0009709516931471673, "loss": 0.1435, "theoretical_loss": 4.643213487251664, "tokens_seen": 127926272 }, { "epoch": 0.04, "learning_rate": 0.000970871449205585, "loss": 0.1397, "theoretical_loss": 4.641949492908737, "tokens_seen": 128188416 }, { "epoch": 0.04, "learning_rate": 0.0009707912052640026, "loss": 0.1406, "theoretical_loss": 4.640688802858113, "tokens_seen": 128450560 }, { "epoch": 0.04, "learning_rate": 0.0009707109613224202, "loss": 0.1436, "theoretical_loss": 4.63943140174476, "tokens_seen": 128712704 }, { "epoch": 0.04, "learning_rate": 0.0009706307173808378, "loss": 0.1405, "theoretical_loss": 4.6381772743160905, "tokens_seen": 128974848 }, { "epoch": 0.04, "learning_rate": 0.0009705504734392554, "loss": 0.1423, "theoretical_loss": 4.636926405421065, "tokens_seen": 129236992 }, { "epoch": 0.04, "learning_rate": 0.0009704702294976729, "loss": 0.1387, "theoretical_loss": 4.635678780009318, "tokens_seen": 129499136 }, { "epoch": 0.04, "learning_rate": 0.0009703899855560905, "loss": 0.1392, "theoretical_loss": 4.634434383130284, "tokens_seen": 129761280 }, { "epoch": 0.04, "learning_rate": 0.0009703097416145081, "loss": 0.1422, "theoretical_loss": 4.633193199932336, "tokens_seen": 130023424 }, { "epoch": 0.04, "learning_rate": 0.0009702294976729256, "loss": 0.1425, "theoretical_loss": 4.631955215661932, "tokens_seen": 130285568 }, { "epoch": 0.04, "learning_rate": 0.0009701492537313433, "loss": 0.1425, "theoretical_loss": 4.630720415662774, "tokens_seen": 130547712 }, { "epoch": 0.04, "learning_rate": 0.0009700690097897609, "loss": 0.1353, "theoretical_loss": 4.629488785374969, "tokens_seen": 130809856 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.005258428864181042, "objective/train/docs_used": 54897, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.703356981277466, "objective/train/original_loss": 2.7033567428588867, "objective/train/theoretical_loss": 4.628260310334209, "objective/train/tokens_used": 151532000, "objective/train/value_avg": -0.01247406005859375, "objective/train/value_loss": 0.0005161120789125562, "objective/train/value_max": -0.0016937255859375, "objective/train/value_min": -0.182861328125, "objective/train/value_reward_corr": 0.283905402927882, "objective/train/value_std": 0.01013946533203125, "objective/train/weight_avg": 1.0054808855056763, "objective/train/weighted_lm_loss": 2.7186498641967773, "objective/train/weights_max": 1.149869441986084, "objective/train/weights_min": 0.2252407968044281, "theoretical_loss": 4.628260310334209, "tokens_seen": 131072000 }, { "epoch": 0.04, "learning_rate": 0.0009699887658481786, "loss": 0.1388, "theoretical_loss": 4.628260310334209, "tokens_seen": 131072000 }, { "epoch": 0.04, "learning_rate": 0.0009699085219065961, "loss": 0.1394, "theoretical_loss": 4.6270349761709495, "tokens_seen": 131334144 }, { "epoch": 0.04, "learning_rate": 0.0009698282779650137, "loss": 0.1354, "theoretical_loss": 4.625812768609601, "tokens_seen": 131596288 }, { "epoch": 0.04, "learning_rate": 0.0009697480340234313, "loss": 0.1392, "theoretical_loss": 4.62459367346773, "tokens_seen": 131858432 }, { "epoch": 0.04, "learning_rate": 0.0009696677900818488, "loss": 0.1383, "theoretical_loss": 4.623377676655271, "tokens_seen": 132120576 }, { "epoch": 0.04, "learning_rate": 0.0009695875461402664, "loss": 0.1387, "theoretical_loss": 4.622164764173735, "tokens_seen": 132382720 }, { "epoch": 0.04, "learning_rate": 0.000969507302198684, "loss": 0.1389, "theoretical_loss": 4.62095492211544, "tokens_seen": 132644864 }, { "epoch": 0.04, "learning_rate": 0.0009694270582571016, "loss": 0.1438, "theoretical_loss": 4.6197481366627455, "tokens_seen": 132907008 }, { "epoch": 0.04, "learning_rate": 0.0009693468143155191, "loss": 0.1385, "theoretical_loss": 4.618544394087287, "tokens_seen": 133169152 }, { "epoch": 0.04, "learning_rate": 0.0009692665703739369, "loss": 0.1327, "theoretical_loss": 4.617343680749233, "tokens_seen": 133431296 }, { "epoch": 0.04, "learning_rate": 0.0009691863264323544, "loss": 0.1351, "theoretical_loss": 4.61614598309653, "tokens_seen": 133693440 }, { "epoch": 0.04, "learning_rate": 0.0009691060824907719, "loss": 0.1364, "theoretical_loss": 4.614951287664179, "tokens_seen": 133955584 }, { "epoch": 0.04, "learning_rate": 0.0009690258385491896, "loss": 0.1374, "theoretical_loss": 4.613759581073502, "tokens_seen": 134217728 }, { "epoch": 0.04, "learning_rate": 0.0009689455946076071, "loss": 0.1366, "theoretical_loss": 4.612570850031418, "tokens_seen": 134479872 }, { "epoch": 0.04, "learning_rate": 0.0009688653506660247, "loss": 0.1337, "theoretical_loss": 4.611385081329736, "tokens_seen": 134742016 }, { "epoch": 0.04, "learning_rate": 0.0009687851067244423, "loss": 0.1349, "theoretical_loss": 4.610202261844444, "tokens_seen": 135004160 }, { "epoch": 0.04, "learning_rate": 0.0009687048627828599, "loss": 0.1352, "theoretical_loss": 4.6090223785350135, "tokens_seen": 135266304 }, { "epoch": 0.04, "learning_rate": 0.0009686246188412776, "loss": 0.1329, "theoretical_loss": 4.607845418443706, "tokens_seen": 135528448 }, { "epoch": 0.04, "learning_rate": 0.0009685443748996951, "loss": 0.1354, "theoretical_loss": 4.606671368694888, "tokens_seen": 135790592 }, { "epoch": 0.04, "learning_rate": 0.0009684641309581127, "loss": 0.1341, "theoretical_loss": 4.6055002164943595, "tokens_seen": 136052736 }, { "epoch": 0.04, "learning_rate": 0.0009683838870165303, "loss": 0.1295, "theoretical_loss": 4.604331949128672, "tokens_seen": 136314880 }, { "epoch": 0.04, "learning_rate": 0.0009683036430749479, "loss": 0.134, "theoretical_loss": 4.603166553964474, "tokens_seen": 136577024 }, { "epoch": 0.04, "learning_rate": 0.0009682233991333654, "loss": 0.1367, "theoretical_loss": 4.60200401844785, "tokens_seen": 136839168 }, { "epoch": 0.04, "learning_rate": 0.0009681431551917831, "loss": 0.1308, "theoretical_loss": 4.6008443301036746, "tokens_seen": 137101312 }, { "epoch": 0.04, "learning_rate": 0.0009680629112502006, "loss": 0.1338, "theoretical_loss": 4.5996874765349585, "tokens_seen": 137363456 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0035103908739984035, "objective/train/docs_used": 57180, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.7642529010772705, "objective/train/original_loss": 2.7642529010772705, "objective/train/theoretical_loss": 4.598533445422221, "objective/train/tokens_used": 158085600, "objective/train/value_avg": -0.01120758056640625, "objective/train/value_loss": 0.0006862494046799839, "objective/train/value_max": -0.0017547607421875, "objective/train/value_min": -0.2978515625, "objective/train/value_reward_corr": 0.42945186696590126, "objective/train/value_std": 0.00952911376953125, "objective/train/weight_avg": 1.0038180351257324, "objective/train/weighted_lm_loss": 2.776818037033081, "objective/train/weights_max": 1.2096216678619385, "objective/train/weights_min": 0.36997780203819275, "theoretical_loss": 4.598533445422221, "tokens_seen": 137625600 }, { "epoch": 0.04, "learning_rate": 0.0009679826673086181, "loss": 0.1328, "theoretical_loss": 4.598533445422221, "tokens_seen": 137625600 }, { "epoch": 0.04, "learning_rate": 0.0009679024233670359, "loss": 0.1312, "theoretical_loss": 4.597382224522855, "tokens_seen": 137887744 }, { "epoch": 0.04, "learning_rate": 0.0009678221794254534, "loss": 0.1327, "theoretical_loss": 4.596233801670502, "tokens_seen": 138149888 }, { "epoch": 0.04, "learning_rate": 0.000967741935483871, "loss": 0.1315, "theoretical_loss": 4.595088164774435, "tokens_seen": 138412032 }, { "epoch": 0.04, "learning_rate": 0.0009676616915422886, "loss": 0.134, "theoretical_loss": 4.593945301818941, "tokens_seen": 138674176 }, { "epoch": 0.04, "learning_rate": 0.0009675814476007062, "loss": 0.1287, "theoretical_loss": 4.592805200862726, "tokens_seen": 138936320 }, { "epoch": 0.04, "learning_rate": 0.0009675012036591238, "loss": 0.1307, "theoretical_loss": 4.591667850038302, "tokens_seen": 139198464 }, { "epoch": 0.04, "learning_rate": 0.0009674209597175413, "loss": 0.13, "theoretical_loss": 4.590533237551401, "tokens_seen": 139460608 }, { "epoch": 0.04, "learning_rate": 0.0009673407157759589, "loss": 0.1303, "theoretical_loss": 4.589401351680385, "tokens_seen": 139722752 }, { "epoch": 0.04, "learning_rate": 0.0009672604718343765, "loss": 0.1324, "theoretical_loss": 4.588272180775659, "tokens_seen": 139984896 }, { "epoch": 0.04, "learning_rate": 0.0009671802278927941, "loss": 0.1279, "theoretical_loss": 4.587145713259102, "tokens_seen": 140247040 }, { "epoch": 0.04, "learning_rate": 0.0009670999839512117, "loss": 0.1287, "theoretical_loss": 4.5860219376234905, "tokens_seen": 140509184 }, { "epoch": 0.04, "learning_rate": 0.0009670197400096294, "loss": 0.1311, "theoretical_loss": 4.584900842431934, "tokens_seen": 140771328 }, { "epoch": 0.04, "learning_rate": 0.0009669394960680469, "loss": 0.1271, "theoretical_loss": 4.583782416317316, "tokens_seen": 141033472 }, { "epoch": 0.04, "learning_rate": 0.0009668592521264644, "loss": 0.1287, "theoretical_loss": 4.582666647981739, "tokens_seen": 141295616 }, { "epoch": 0.04, "learning_rate": 0.0009667790081848821, "loss": 0.1268, "theoretical_loss": 4.581553526195974, "tokens_seen": 141557760 }, { "epoch": 0.04, "learning_rate": 0.0009666987642432996, "loss": 0.1293, "theoretical_loss": 4.580443039798922, "tokens_seen": 141819904 }, { "epoch": 0.04, "learning_rate": 0.0009666185203017172, "loss": 0.1296, "theoretical_loss": 4.57933517769707, "tokens_seen": 142082048 }, { "epoch": 0.04, "learning_rate": 0.0009665382763601348, "loss": 0.1308, "theoretical_loss": 4.578229928863959, "tokens_seen": 142344192 }, { "epoch": 0.04, "learning_rate": 0.0009664580324185524, "loss": 0.1281, "theoretical_loss": 4.57712728233966, "tokens_seen": 142606336 }, { "epoch": 0.04, "learning_rate": 0.00096637778847697, "loss": 0.1267, "theoretical_loss": 4.576027227230245, "tokens_seen": 142868480 }, { "epoch": 0.04, "learning_rate": 0.0009662975445353877, "loss": 0.1325, "theoretical_loss": 4.574929752707274, "tokens_seen": 143130624 }, { "epoch": 0.04, "learning_rate": 0.0009662173005938052, "loss": 0.1283, "theoretical_loss": 4.573834848007284, "tokens_seen": 143392768 }, { "epoch": 0.04, "learning_rate": 0.0009661370566522228, "loss": 0.127, "theoretical_loss": 4.572742502431272, "tokens_seen": 143654912 }, { "epoch": 0.04, "learning_rate": 0.0009660568127106404, "loss": 0.1264, "theoretical_loss": 4.571652705344202, "tokens_seen": 143917056 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0026565194129943848, "objective/train/docs_used": 59583, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.5324413776397705, "objective/train/original_loss": 2.5324416160583496, "objective/train/theoretical_loss": 4.570565446174504, "objective/train/tokens_used": 164639200, "objective/train/value_avg": -0.01026153564453125, "objective/train/value_loss": 0.0005388536374084651, "objective/train/value_max": -0.0011034011840820312, "objective/train/value_min": -0.2158203125, "objective/train/value_reward_corr": 0.41669318991791116, "objective/train/value_std": 0.0081024169921875, "objective/train/weight_avg": 1.0029032230377197, "objective/train/weighted_lm_loss": 2.539125442504883, "objective/train/weights_max": 1.1576155424118042, "objective/train/weights_min": 0.37208810448646545, "theoretical_loss": 4.570565446174504, "tokens_seen": 144179200 }, { "epoch": 0.04, "learning_rate": 0.0009659765687690579, "loss": 0.127, "theoretical_loss": 4.570565446174504, "tokens_seen": 144179200 }, { "epoch": 0.04, "learning_rate": 0.0009658963248274756, "loss": 0.1256, "theoretical_loss": 4.569480714413578, "tokens_seen": 144441344 }, { "epoch": 0.04, "learning_rate": 0.0009658160808858931, "loss": 0.1276, "theoretical_loss": 4.568398499615305, "tokens_seen": 144703488 }, { "epoch": 0.04, "learning_rate": 0.0009657358369443107, "loss": 0.1274, "theoretical_loss": 4.56731879139557, "tokens_seen": 144965632 }, { "epoch": 0.04, "learning_rate": 0.0009656555930027284, "loss": 0.1258, "theoretical_loss": 4.566241579431776, "tokens_seen": 145227776 }, { "epoch": 0.04, "learning_rate": 0.0009655753490611459, "loss": 0.1274, "theoretical_loss": 4.565166853462371, "tokens_seen": 145489920 }, { "epoch": 0.04, "learning_rate": 0.0009654951051195635, "loss": 0.1289, "theoretical_loss": 4.564094603286375, "tokens_seen": 145752064 }, { "epoch": 0.04, "learning_rate": 0.0009654148611779811, "loss": 0.1239, "theoretical_loss": 4.5630248187629245, "tokens_seen": 146014208 }, { "epoch": 0.04, "learning_rate": 0.0009653346172363987, "loss": 0.1252, "theoretical_loss": 4.561957489810798, "tokens_seen": 146276352 }, { "epoch": 0.04, "learning_rate": 0.0009652543732948162, "loss": 0.1234, "theoretical_loss": 4.5608926064079665, "tokens_seen": 146538496 }, { "epoch": 0.04, "learning_rate": 0.0009651741293532339, "loss": 0.127, "theoretical_loss": 4.559830158591139, "tokens_seen": 146800640 }, { "epoch": 0.04, "learning_rate": 0.0009650938854116514, "loss": 0.1215, "theoretical_loss": 4.558770136455316, "tokens_seen": 147062784 }, { "epoch": 0.04, "learning_rate": 0.0009650136414700689, "loss": 0.1245, "theoretical_loss": 4.557712530153342, "tokens_seen": 147324928 }, { "epoch": 0.04, "learning_rate": 0.0009649333975284866, "loss": 0.1217, "theoretical_loss": 4.556657329895469, "tokens_seen": 147587072 }, { "epoch": 0.04, "learning_rate": 0.0009648531535869042, "loss": 0.125, "theoretical_loss": 4.5556045259489135, "tokens_seen": 147849216 }, { "epoch": 0.04, "learning_rate": 0.0009647729096453219, "loss": 0.1258, "theoretical_loss": 4.554554108637437, "tokens_seen": 148111360 }, { "epoch": 0.04, "learning_rate": 0.0009646926657037394, "loss": 0.1232, "theoretical_loss": 4.553506068340907, "tokens_seen": 148373504 }, { "epoch": 0.05, "learning_rate": 0.000964612421762157, "loss": 0.1224, "theoretical_loss": 4.552460395494878, "tokens_seen": 148635648 }, { "epoch": 0.05, "learning_rate": 0.0009645321778205746, "loss": 0.1258, "theoretical_loss": 4.55141708059017, "tokens_seen": 148897792 }, { "epoch": 0.05, "learning_rate": 0.0009644519338789921, "loss": 0.1209, "theoretical_loss": 4.5503761141724555, "tokens_seen": 149159936 }, { "epoch": 0.05, "learning_rate": 0.0009643716899374097, "loss": 0.1227, "theoretical_loss": 4.549337486841843, "tokens_seen": 149422080 }, { "epoch": 0.05, "learning_rate": 0.0009642914459958273, "loss": 0.1231, "theoretical_loss": 4.548301189252473, "tokens_seen": 149684224 }, { "epoch": 0.05, "learning_rate": 0.000964211202054245, "loss": 0.1237, "theoretical_loss": 4.54726721211211, "tokens_seen": 149946368 }, { "epoch": 0.05, "learning_rate": 0.0009641309581126625, "loss": 0.1212, "theoretical_loss": 4.546235546181743, "tokens_seen": 150208512 }, { "epoch": 0.05, "learning_rate": 0.0009640507141710802, "loss": 0.118, "theoretical_loss": 4.545206182275189, "tokens_seen": 150470656 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.002808451419696212, "objective/train/docs_used": 61752, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.444796085357666, "objective/train/original_loss": 2.444795846939087, "objective/train/theoretical_loss": 4.5441791112587016, "objective/train/tokens_used": 171192800, "objective/train/value_avg": -0.01094818115234375, "objective/train/value_loss": 0.0004207800084259361, "objective/train/value_max": -0.0013093948364257812, "objective/train/value_min": -0.256103515625, "objective/train/value_reward_corr": 0.4595803265485007, "objective/train/value_std": 0.00933074951171875, "objective/train/weight_avg": 1.0030062198638916, "objective/train/weighted_lm_loss": 2.4522807598114014, "objective/train/weights_max": 1.231403112411499, "objective/train/weights_min": 0.3714272677898407, "theoretical_loss": 4.5441791112587016, "tokens_seen": 150732800 }, { "epoch": 0.05, "learning_rate": 0.0009639704702294977, "loss": 0.1196, "theoretical_loss": 4.5441791112587016, "tokens_seen": 150732800 }, { "epoch": 0.05, "learning_rate": 0.0009638902262879152, "loss": 0.1256, "theoretical_loss": 4.5431543240505725, "tokens_seen": 150994944 }, { "epoch": 0.05, "learning_rate": 0.0009638099823463329, "loss": 0.123, "theoretical_loss": 4.5421318116207585, "tokens_seen": 151257088 }, { "epoch": 0.05, "learning_rate": 0.0009637297384047504, "loss": 0.1184, "theoretical_loss": 4.541111564990485, "tokens_seen": 151519232 }, { "epoch": 0.05, "learning_rate": 0.0009636494944631681, "loss": 0.1206, "theoretical_loss": 4.540093575231879, "tokens_seen": 151781376 }, { "epoch": 0.05, "learning_rate": 0.0009635692505215856, "loss": 0.119, "theoretical_loss": 4.539077833467582, "tokens_seen": 152043520 }, { "epoch": 0.05, "learning_rate": 0.0009634890065800032, "loss": 0.1207, "theoretical_loss": 4.538064330870389, "tokens_seen": 152305664 }, { "epoch": 0.05, "learning_rate": 0.0009634087626384209, "loss": 0.1227, "theoretical_loss": 4.537053058662869, "tokens_seen": 152567808 }, { "epoch": 0.05, "learning_rate": 0.0009633285186968385, "loss": 0.1211, "theoretical_loss": 4.536044008117005, "tokens_seen": 152829952 }, { "epoch": 0.05, "learning_rate": 0.000963248274755256, "loss": 0.1239, "theoretical_loss": 4.535037170553833, "tokens_seen": 153092096 }, { "epoch": 0.05, "learning_rate": 0.0009631680308136736, "loss": 0.1197, "theoretical_loss": 4.534032537343078, "tokens_seen": 153354240 }, { "epoch": 0.05, "learning_rate": 0.0009630877868720912, "loss": 0.1227, "theoretical_loss": 4.533030099902803, "tokens_seen": 153616384 }, { "epoch": 0.05, "learning_rate": 0.0009630075429305087, "loss": 0.1232, "theoretical_loss": 4.53202984969905, "tokens_seen": 153878528 }, { "epoch": 0.05, "learning_rate": 0.0009629272989889264, "loss": 0.1246, "theoretical_loss": 4.531031778245499, "tokens_seen": 154140672 }, { "epoch": 0.05, "learning_rate": 0.0009628470550473439, "loss": 0.1214, "theoretical_loss": 4.530035877103115, "tokens_seen": 154402816 }, { "epoch": 0.05, "learning_rate": 0.0009627668111057615, "loss": 0.121, "theoretical_loss": 4.529042137879809, "tokens_seen": 154664960 }, { "epoch": 0.05, "learning_rate": 0.0009626865671641792, "loss": 0.1184, "theoretical_loss": 4.528050552230092, "tokens_seen": 154927104 }, { "epoch": 0.05, "learning_rate": 0.0009626063232225967, "loss": 0.1205, "theoretical_loss": 4.527061111854746, "tokens_seen": 155189248 }, { "epoch": 0.05, "learning_rate": 0.0009625260792810143, "loss": 0.1217, "theoretical_loss": 4.526073808500481, "tokens_seen": 155451392 }, { "epoch": 0.05, "learning_rate": 0.0009624458353394319, "loss": 0.1181, "theoretical_loss": 4.525088633959613, "tokens_seen": 155713536 }, { "epoch": 0.05, "learning_rate": 0.0009623655913978495, "loss": 0.1176, "theoretical_loss": 4.524105580069728, "tokens_seen": 155975680 }, { "epoch": 0.05, "learning_rate": 0.0009622853474562671, "loss": 0.1178, "theoretical_loss": 4.523124638713361, "tokens_seen": 156237824 }, { "epoch": 0.05, "learning_rate": 0.0009622051035146847, "loss": 0.1206, "theoretical_loss": 4.522145801817673, "tokens_seen": 156499968 }, { "epoch": 0.05, "learning_rate": 0.0009621248595731022, "loss": 0.1165, "theoretical_loss": 4.521169061354129, "tokens_seen": 156762112 }, { "epoch": 0.05, "learning_rate": 0.0009620446156315198, "loss": 0.1179, "theoretical_loss": 4.520194409338185, "tokens_seen": 157024256 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.002041465835645795, "objective/train/docs_used": 64097, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.572587728500366, "objective/train/original_loss": 2.572587728500366, "objective/train/theoretical_loss": 4.519221837828971, "objective/train/tokens_used": 177746400, "objective/train/value_avg": -0.00862884521484375, "objective/train/value_loss": 0.00036377867218106985, "objective/train/value_max": -0.0010986328125, "objective/train/value_min": -0.177490234375, "objective/train/value_reward_corr": 0.3439090032549563, "objective/train/value_std": 0.005954742431640625, "objective/train/weight_avg": 1.002203106880188, "objective/train/weighted_lm_loss": 2.5783188343048096, "objective/train/weights_max": 1.1067639589309692, "objective/train/weights_min": 0.22429333627223969, "theoretical_loss": 4.519221837828971, "tokens_seen": 157286400 }, { "epoch": 0.05, "learning_rate": 0.0009619643716899374, "loss": 0.1204, "theoretical_loss": 4.519221837828971, "tokens_seen": 157286400 }, { "epoch": 0.05, "learning_rate": 0.000961884127748355, "loss": 0.1203, "theoretical_loss": 4.51825133892898, "tokens_seen": 157548544 }, { "epoch": 0.05, "learning_rate": 0.0009618038838067727, "loss": 0.1186, "theoretical_loss": 4.517282904783764, "tokens_seen": 157810688 }, { "epoch": 0.05, "learning_rate": 0.0009617236398651902, "loss": 0.1215, "theoretical_loss": 4.516316527581621, "tokens_seen": 158072832 }, { "epoch": 0.05, "learning_rate": 0.0009616433959236078, "loss": 0.1222, "theoretical_loss": 4.515352199553295, "tokens_seen": 158334976 }, { "epoch": 0.05, "learning_rate": 0.0009615631519820254, "loss": 0.1203, "theoretical_loss": 4.514389912971679, "tokens_seen": 158597120 }, { "epoch": 0.05, "learning_rate": 0.0009614829080404429, "loss": 0.1202, "theoretical_loss": 4.513429660151513, "tokens_seen": 158859264 }, { "epoch": 0.05, "learning_rate": 0.0009614026640988605, "loss": 0.1169, "theoretical_loss": 4.51247143344909, "tokens_seen": 159121408 }, { "epoch": 0.05, "learning_rate": 0.0009613224201572781, "loss": 0.1199, "theoretical_loss": 4.511515225261961, "tokens_seen": 159383552 }, { "epoch": 0.05, "learning_rate": 0.0009612421762156957, "loss": 0.1184, "theoretical_loss": 4.5105610280286506, "tokens_seen": 159645696 }, { "epoch": 0.05, "learning_rate": 0.0009611619322741134, "loss": 0.1197, "theoretical_loss": 4.509608834228365, "tokens_seen": 159907840 }, { "epoch": 0.05, "learning_rate": 0.000961081688332531, "loss": 0.1157, "theoretical_loss": 4.508658636380705, "tokens_seen": 160169984 }, { "epoch": 0.05, "learning_rate": 0.0009610014443909485, "loss": 0.1206, "theoretical_loss": 4.507710427045389, "tokens_seen": 160432128 }, { "epoch": 0.05, "learning_rate": 0.0009609212004493661, "loss": 0.1179, "theoretical_loss": 4.50676419882197, "tokens_seen": 160694272 }, { "epoch": 0.05, "learning_rate": 0.0009608409565077837, "loss": 0.1181, "theoretical_loss": 4.505819944349556, "tokens_seen": 160956416 }, { "epoch": 0.05, "learning_rate": 0.0009607607125662012, "loss": 0.1152, "theoretical_loss": 4.504877656306535, "tokens_seen": 161218560 }, { "epoch": 0.05, "learning_rate": 0.0009606804686246189, "loss": 0.1187, "theoretical_loss": 4.503937327410306, "tokens_seen": 161480704 }, { "epoch": 0.05, "learning_rate": 0.0009606002246830364, "loss": 0.1185, "theoretical_loss": 4.502998950417004, "tokens_seen": 161742848 }, { "epoch": 0.05, "learning_rate": 0.000960519980741454, "loss": 0.1186, "theoretical_loss": 4.502062518121232, "tokens_seen": 162004992 }, { "epoch": 0.05, "learning_rate": 0.0009604397367998717, "loss": 0.1161, "theoretical_loss": 4.501128023355796, "tokens_seen": 162267136 }, { "epoch": 0.05, "learning_rate": 0.0009603594928582893, "loss": 0.116, "theoretical_loss": 4.500195458991443, "tokens_seen": 162529280 }, { "epoch": 0.05, "learning_rate": 0.0009602792489167068, "loss": 0.1189, "theoretical_loss": 4.499264817936593, "tokens_seen": 162791424 }, { "epoch": 0.05, "learning_rate": 0.0009601990049751244, "loss": 0.1202, "theoretical_loss": 4.498336093137089, "tokens_seen": 163053568 }, { "epoch": 0.05, "learning_rate": 0.000960118761033542, "loss": 0.1179, "theoretical_loss": 4.49740927757593, "tokens_seen": 163315712 }, { "epoch": 0.05, "learning_rate": 0.0009600385170919595, "loss": 0.1162, "theoretical_loss": 4.496484364273021, "tokens_seen": 163577856 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.004073688294738531, "objective/train/docs_used": 66569, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.1198720932006836, "objective/train/original_loss": 2.1198720932006836, "objective/train/theoretical_loss": 4.495561346284925, "objective/train/tokens_used": 184300000, "objective/train/value_avg": -0.009857177734375, "objective/train/value_loss": 0.00025354541139677167, "objective/train/value_max": -0.0008263587951660156, "objective/train/value_min": -0.55126953125, "objective/train/value_reward_corr": 0.4736301748253403, "objective/train/value_std": 0.00994110107421875, "objective/train/weight_avg": 1.004190444946289, "objective/train/weighted_lm_loss": 2.12888765335083, "objective/train/weights_max": 1.1655560731887817, "objective/train/weights_min": 0.36955323815345764, "theoretical_loss": 4.495561346284925, "tokens_seen": 163840000 }, { "epoch": 0.05, "learning_rate": 0.0009599582731503772, "loss": 0.1154, "theoretical_loss": 4.495561346284925, "tokens_seen": 163840000 }, { "epoch": 0.05, "learning_rate": 0.0009598780292087947, "loss": 0.117, "theoretical_loss": 4.494640216704598, "tokens_seen": 164102144 }, { "epoch": 0.05, "learning_rate": 0.0009597977852672124, "loss": 0.1187, "theoretical_loss": 4.493720968661158, "tokens_seen": 164364288 }, { "epoch": 0.05, "learning_rate": 0.00095971754132563, "loss": 0.1155, "theoretical_loss": 4.492803595319623, "tokens_seen": 164626432 }, { "epoch": 0.05, "learning_rate": 0.0009596372973840475, "loss": 0.1145, "theoretical_loss": 4.49188808988068, "tokens_seen": 164888576 }, { "epoch": 0.05, "learning_rate": 0.0009595570534424652, "loss": 0.1111, "theoretical_loss": 4.490974445580429, "tokens_seen": 165150720 }, { "epoch": 0.05, "learning_rate": 0.0009594768095008827, "loss": 0.1124, "theoretical_loss": 4.490062655690153, "tokens_seen": 165412864 }, { "epoch": 0.05, "learning_rate": 0.0009593965655593003, "loss": 0.1125, "theoretical_loss": 4.489152713516077, "tokens_seen": 165675008 }, { "epoch": 0.05, "learning_rate": 0.0009593163216177179, "loss": 0.1103, "theoretical_loss": 4.488244612399129, "tokens_seen": 165937152 }, { "epoch": 0.05, "learning_rate": 0.0009592360776761355, "loss": 0.1118, "theoretical_loss": 4.487338345714707, "tokens_seen": 166199296 }, { "epoch": 0.05, "learning_rate": 0.000959155833734553, "loss": 0.1136, "theoretical_loss": 4.486433906872448, "tokens_seen": 166461440 }, { "epoch": 0.05, "learning_rate": 0.0009590755897929706, "loss": 0.1148, "theoretical_loss": 4.485531289315997, "tokens_seen": 166723584 }, { "epoch": 0.05, "learning_rate": 0.0009589953458513882, "loss": 0.1092, "theoretical_loss": 4.484630486522775, "tokens_seen": 166985728 }, { "epoch": 0.05, "learning_rate": 0.0009589151019098058, "loss": 0.1164, "theoretical_loss": 4.483731492003757, "tokens_seen": 167247872 }, { "epoch": 0.05, "learning_rate": 0.0009588348579682235, "loss": 0.1126, "theoretical_loss": 4.482834299303246, "tokens_seen": 167510016 }, { "epoch": 0.05, "learning_rate": 0.000958754614026641, "loss": 0.1125, "theoretical_loss": 4.481938901998647, "tokens_seen": 167772160 }, { "epoch": 0.05, "learning_rate": 0.0009586743700850587, "loss": 0.1122, "theoretical_loss": 4.481045293700248, "tokens_seen": 168034304 }, { "epoch": 0.05, "learning_rate": 0.0009585941261434762, "loss": 0.1135, "theoretical_loss": 4.480153468051001, "tokens_seen": 168296448 }, { "epoch": 0.05, "learning_rate": 0.0009585138822018937, "loss": 0.1153, "theoretical_loss": 4.4792634187263065, "tokens_seen": 168558592 }, { "epoch": 0.05, "learning_rate": 0.0009584336382603114, "loss": 0.1143, "theoretical_loss": 4.4783751394337905, "tokens_seen": 168820736 }, { "epoch": 0.05, "learning_rate": 0.0009583533943187289, "loss": 0.1091, "theoretical_loss": 4.4774886239131, "tokens_seen": 169082880 }, { "epoch": 0.05, "learning_rate": 0.0009582731503771465, "loss": 0.116, "theoretical_loss": 4.476603865935683, "tokens_seen": 169345024 }, { "epoch": 0.05, "learning_rate": 0.0009581929064355642, "loss": 0.1161, "theoretical_loss": 4.475720859304583, "tokens_seen": 169607168 }, { "epoch": 0.05, "learning_rate": 0.0009581126624939818, "loss": 0.1111, "theoretical_loss": 4.474839597854226, "tokens_seen": 169869312 }, { "epoch": 0.05, "learning_rate": 0.0009580324185523993, "loss": 0.1142, "theoretical_loss": 4.473960075450218, "tokens_seen": 170131456 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.004443011246621609, "objective/train/docs_used": 69022, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.34761905670166, "objective/train/original_loss": 2.34761905670166, "objective/train/theoretical_loss": 4.473082285989134, "objective/train/tokens_used": 190853600, "objective/train/value_avg": -0.009521484375, "objective/train/value_loss": 0.00034313887590542436, "objective/train/value_max": -0.001094818115234375, "objective/train/value_min": -0.2257080078125, "objective/train/value_reward_corr": 0.3206139523753071, "objective/train/value_std": 0.00771331787109375, "objective/train/weight_avg": 1.0045922994613647, "objective/train/weighted_lm_loss": 2.357492446899414, "objective/train/weights_max": 1.1233699321746826, "objective/train/weights_min": 0.23052240908145905, "theoretical_loss": 4.473082285989134, "tokens_seen": 170393600 }, { "epoch": 0.05, "learning_rate": 0.0009579521746108169, "loss": 0.1172, "theoretical_loss": 4.473082285989134, "tokens_seen": 170393600 }, { "epoch": 0.05, "learning_rate": 0.0009578719306692345, "loss": 0.114, "theoretical_loss": 4.472206223398325, "tokens_seen": 170655744 }, { "epoch": 0.05, "learning_rate": 0.000957791686727652, "loss": 0.1133, "theoretical_loss": 4.471331881635698, "tokens_seen": 170917888 }, { "epoch": 0.05, "learning_rate": 0.0009577114427860697, "loss": 0.1127, "theoretical_loss": 4.470459254689533, "tokens_seen": 171180032 }, { "epoch": 0.05, "learning_rate": 0.0009576311988444872, "loss": 0.1129, "theoretical_loss": 4.469588336578277, "tokens_seen": 171442176 }, { "epoch": 0.05, "learning_rate": 0.0009575509549029048, "loss": 0.1119, "theoretical_loss": 4.468719121350343, "tokens_seen": 171704320 }, { "epoch": 0.05, "learning_rate": 0.0009574707109613225, "loss": 0.1102, "theoretical_loss": 4.467851603083923, "tokens_seen": 171966464 }, { "epoch": 0.05, "learning_rate": 0.00095739046701974, "loss": 0.1137, "theoretical_loss": 4.466985775886784, "tokens_seen": 172228608 }, { "epoch": 0.05, "learning_rate": 0.0009573102230781577, "loss": 0.1119, "theoretical_loss": 4.466121633896087, "tokens_seen": 172490752 }, { "epoch": 0.05, "learning_rate": 0.0009572299791365752, "loss": 0.1152, "theoretical_loss": 4.465259171278182, "tokens_seen": 172752896 }, { "epoch": 0.05, "learning_rate": 0.0009571497351949928, "loss": 0.1116, "theoretical_loss": 4.464398382228435, "tokens_seen": 173015040 }, { "epoch": 0.05, "learning_rate": 0.0009570694912534104, "loss": 0.1146, "theoretical_loss": 4.463539260971023, "tokens_seen": 173277184 }, { "epoch": 0.05, "learning_rate": 0.000956989247311828, "loss": 0.1148, "theoretical_loss": 4.462681801758762, "tokens_seen": 173539328 }, { "epoch": 0.05, "learning_rate": 0.0009569090033702455, "loss": 0.1142, "theoretical_loss": 4.461825998872914, "tokens_seen": 173801472 }, { "epoch": 0.05, "learning_rate": 0.0009568287594286632, "loss": 0.1076, "theoretical_loss": 4.460971846623005, "tokens_seen": 174063616 }, { "epoch": 0.05, "learning_rate": 0.0009567485154870808, "loss": 0.1106, "theoretical_loss": 4.460119339346643, "tokens_seen": 174325760 }, { "epoch": 0.05, "learning_rate": 0.0009566682715454983, "loss": 0.1086, "theoretical_loss": 4.45926847140934, "tokens_seen": 174587904 }, { "epoch": 0.05, "learning_rate": 0.000956588027603916, "loss": 0.1096, "theoretical_loss": 4.45841923720433, "tokens_seen": 174850048 }, { "epoch": 0.05, "learning_rate": 0.0009565077836623335, "loss": 0.1163, "theoretical_loss": 4.4575716311523905, "tokens_seen": 175112192 }, { "epoch": 0.05, "learning_rate": 0.0009564275397207511, "loss": 0.1118, "theoretical_loss": 4.456725647701669, "tokens_seen": 175374336 }, { "epoch": 0.05, "learning_rate": 0.0009563472957791687, "loss": 0.1112, "theoretical_loss": 4.455881281327508, "tokens_seen": 175636480 }, { "epoch": 0.05, "learning_rate": 0.0009562670518375863, "loss": 0.1138, "theoretical_loss": 4.4550385265322685, "tokens_seen": 175898624 }, { "epoch": 0.05, "learning_rate": 0.0009561868078960038, "loss": 0.1107, "theoretical_loss": 4.45419737784516, "tokens_seen": 176160768 }, { "epoch": 0.05, "learning_rate": 0.0009561065639544214, "loss": 0.113, "theoretical_loss": 4.45335782982207, "tokens_seen": 176422912 }, { "epoch": 0.05, "learning_rate": 0.000956026320012839, "loss": 0.1134, "theoretical_loss": 4.452519877045393, "tokens_seen": 176685056 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.004205191973596811, "objective/train/docs_used": 71338, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.1437151432037354, "objective/train/original_loss": 2.1437151432037354, "objective/train/theoretical_loss": 4.451683514123864, "objective/train/tokens_used": 197407200, "objective/train/value_avg": -0.009735107421875, "objective/train/value_loss": 0.00020696282444987446, "objective/train/value_max": -0.0012941360473632812, "objective/train/value_min": -0.50732421875, "objective/train/value_reward_corr": 0.42861167714862647, "objective/train/value_std": 0.00954437255859375, "objective/train/weight_avg": 1.0043038129806519, "objective/train/weighted_lm_loss": 2.1534533500671387, "objective/train/weights_max": 1.4412035942077637, "objective/train/weights_min": 0.3694094717502594, "theoretical_loss": 4.451683514123864, "tokens_seen": 176947200 }, { "epoch": 0.05, "learning_rate": 0.0009559460760712567, "loss": 0.1134, "theoretical_loss": 4.451683514123864, "tokens_seen": 176947200 }, { "epoch": 0.05, "learning_rate": 0.0009558658321296743, "loss": 0.1131, "theoretical_loss": 4.450848735692391, "tokens_seen": 177209344 }, { "epoch": 0.05, "learning_rate": 0.0009557855881880918, "loss": 0.1129, "theoretical_loss": 4.450015536411886, "tokens_seen": 177471488 }, { "epoch": 0.05, "learning_rate": 0.0009557053442465095, "loss": 0.1109, "theoretical_loss": 4.449183910969108, "tokens_seen": 177733632 }, { "epoch": 0.05, "learning_rate": 0.000955625100304927, "loss": 0.1089, "theoretical_loss": 4.448353854076494, "tokens_seen": 177995776 }, { "epoch": 0.05, "learning_rate": 0.0009555448563633445, "loss": 0.1123, "theoretical_loss": 4.4475253604719995, "tokens_seen": 178257920 }, { "epoch": 0.05, "learning_rate": 0.0009554646124217622, "loss": 0.1079, "theoretical_loss": 4.446698424918937, "tokens_seen": 178520064 }, { "epoch": 0.05, "learning_rate": 0.0009553843684801797, "loss": 0.1115, "theoretical_loss": 4.44587304220582, "tokens_seen": 178782208 }, { "epoch": 0.05, "learning_rate": 0.0009553041245385973, "loss": 0.1076, "theoretical_loss": 4.4450492071462, "tokens_seen": 179044352 }, { "epoch": 0.05, "learning_rate": 0.000955223880597015, "loss": 0.1146, "theoretical_loss": 4.444226914578513, "tokens_seen": 179306496 }, { "epoch": 0.05, "learning_rate": 0.0009551436366554326, "loss": 0.1136, "theoretical_loss": 4.4434061593659235, "tokens_seen": 179568640 }, { "epoch": 0.05, "learning_rate": 0.0009550633927138501, "loss": 0.1129, "theoretical_loss": 4.442586936396171, "tokens_seen": 179830784 }, { "epoch": 0.05, "learning_rate": 0.0009549831487722677, "loss": 0.1116, "theoretical_loss": 4.441769240581412, "tokens_seen": 180092928 }, { "epoch": 0.05, "learning_rate": 0.0009549029048306853, "loss": 0.1162, "theoretical_loss": 4.440953066858077, "tokens_seen": 180355072 }, { "epoch": 0.05, "learning_rate": 0.0009548226608891028, "loss": 0.1104, "theoretical_loss": 4.4401384101867105, "tokens_seen": 180617216 }, { "epoch": 0.05, "learning_rate": 0.0009547424169475205, "loss": 0.1108, "theoretical_loss": 4.439325265551826, "tokens_seen": 180879360 }, { "epoch": 0.05, "learning_rate": 0.000954662173005938, "loss": 0.1117, "theoretical_loss": 4.438513627961757, "tokens_seen": 181141504 }, { "epoch": 0.05, "learning_rate": 0.0009545819290643557, "loss": 0.1118, "theoretical_loss": 4.437703492448509, "tokens_seen": 181403648 }, { "epoch": 0.06, "learning_rate": 0.0009545016851227733, "loss": 0.1139, "theoretical_loss": 4.436894854067614, "tokens_seen": 181665792 }, { "epoch": 0.06, "learning_rate": 0.0009544214411811908, "loss": 0.1126, "theoretical_loss": 4.436087707897984, "tokens_seen": 181927936 }, { "epoch": 0.06, "learning_rate": 0.0009543411972396085, "loss": 0.1126, "theoretical_loss": 4.435282049041769, "tokens_seen": 182190080 }, { "epoch": 0.06, "learning_rate": 0.000954260953298026, "loss": 0.1106, "theoretical_loss": 4.434477872624212, "tokens_seen": 182452224 }, { "epoch": 0.06, "learning_rate": 0.0009541807093564436, "loss": 0.1133, "theoretical_loss": 4.433675173793507, "tokens_seen": 182714368 }, { "epoch": 0.06, "learning_rate": 0.0009541004654148612, "loss": 0.1116, "theoretical_loss": 4.43287394772066, "tokens_seen": 182976512 }, { "epoch": 0.06, "learning_rate": 0.0009540202214732788, "loss": 0.1147, "theoretical_loss": 4.43207418959935, "tokens_seen": 183238656 }, { "epoch": 0.06, "objective/train/advantage_avg": -0.000987537787295878, "objective/train/docs_used": 73723, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.2004032135009766, "objective/train/original_loss": 2.2004029750823975, "objective/train/theoretical_loss": 4.431275894645784, "objective/train/tokens_used": 203960800, "objective/train/value_avg": -0.00931549072265625, "objective/train/value_loss": 0.001105409930460155, "objective/train/value_max": -0.0009927749633789062, "objective/train/value_min": -0.28662109375, "objective/train/value_reward_corr": 0.2604573666222557, "objective/train/value_std": 0.00833892822265625, "objective/train/weight_avg": 0.9995287656784058, "objective/train/weighted_lm_loss": 2.196560859680176, "objective/train/weights_max": 1.1982022523880005, "objective/train/weights_min": 0.36976757645606995, "theoretical_loss": 4.431275894645784, "tokens_seen": 183500800 }, { "epoch": 0.06, "learning_rate": 0.0009539399775316963, "loss": 0.1075, "theoretical_loss": 4.431275894645784, "tokens_seen": 183500800 }, { "epoch": 0.06, "learning_rate": 0.000953859733590114, "loss": 0.1111, "theoretical_loss": 4.43047905809857, "tokens_seen": 183762944 }, { "epoch": 0.06, "learning_rate": 0.0009537794896485315, "loss": 0.1096, "theoretical_loss": 4.42968367521857, "tokens_seen": 184025088 }, { "epoch": 0.06, "learning_rate": 0.000953699245706949, "loss": 0.1113, "theoretical_loss": 4.428889741288771, "tokens_seen": 184287232 }, { "epoch": 0.06, "learning_rate": 0.0009536190017653668, "loss": 0.1123, "theoretical_loss": 4.428097251614145, "tokens_seen": 184549376 }, { "epoch": 0.06, "learning_rate": 0.0009535387578237843, "loss": 0.1104, "theoretical_loss": 4.427306201521524, "tokens_seen": 184811520 }, { "epoch": 0.06, "learning_rate": 0.000953458513882202, "loss": 0.1098, "theoretical_loss": 4.426516586359458, "tokens_seen": 185073664 }, { "epoch": 0.06, "learning_rate": 0.0009533782699406195, "loss": 0.1119, "theoretical_loss": 4.425728401498089, "tokens_seen": 185335808 }, { "epoch": 0.06, "learning_rate": 0.0009532980259990371, "loss": 0.1123, "theoretical_loss": 4.424941642329019, "tokens_seen": 185597952 }, { "epoch": 0.06, "learning_rate": 0.0009532177820574547, "loss": 0.1109, "theoretical_loss": 4.42415630426518, "tokens_seen": 185860096 }, { "epoch": 0.06, "learning_rate": 0.0009531375381158722, "loss": 0.1093, "theoretical_loss": 4.423372382740707, "tokens_seen": 186122240 }, { "epoch": 0.06, "learning_rate": 0.0009530572941742898, "loss": 0.1064, "theoretical_loss": 4.422589873210806, "tokens_seen": 186384384 }, { "epoch": 0.06, "learning_rate": 0.0009529770502327075, "loss": 0.1071, "theoretical_loss": 4.4218087711516345, "tokens_seen": 186646528 }, { "epoch": 0.06, "learning_rate": 0.0009528968062911251, "loss": 0.1123, "theoretical_loss": 4.421029072060167, "tokens_seen": 186908672 }, { "epoch": 0.06, "learning_rate": 0.0009528165623495426, "loss": 0.1095, "theoretical_loss": 4.420250771454078, "tokens_seen": 187170816 }, { "epoch": 0.06, "learning_rate": 0.0009527363184079603, "loss": 0.1135, "theoretical_loss": 4.419473864871613, "tokens_seen": 187432960 }, { "epoch": 0.06, "learning_rate": 0.0009526560744663778, "loss": 0.1138, "theoretical_loss": 4.4186983478714685, "tokens_seen": 187695104 }, { "epoch": 0.06, "learning_rate": 0.0009525758305247953, "loss": 0.1132, "theoretical_loss": 4.417924216032667, "tokens_seen": 187957248 }, { "epoch": 0.06, "learning_rate": 0.000952495586583213, "loss": 0.1117, "theoretical_loss": 4.417151464954437, "tokens_seen": 188219392 }, { "epoch": 0.06, "learning_rate": 0.0009524153426416305, "loss": 0.1107, "theoretical_loss": 4.416380090256095, "tokens_seen": 188481536 }, { "epoch": 0.06, "learning_rate": 0.0009523350987000481, "loss": 0.1148, "theoretical_loss": 4.415610087576923, "tokens_seen": 188743680 }, { "epoch": 0.06, "learning_rate": 0.0009522548547584658, "loss": 0.1119, "theoretical_loss": 4.414841452576049, "tokens_seen": 189005824 }, { "epoch": 0.06, "learning_rate": 0.0009521746108168834, "loss": 0.1084, "theoretical_loss": 4.414074180932333, "tokens_seen": 189267968 }, { "epoch": 0.06, "learning_rate": 0.000952094366875301, "loss": 0.1087, "theoretical_loss": 4.413308268344249, "tokens_seen": 189530112 }, { "epoch": 0.06, "learning_rate": 0.0009520141229337185, "loss": 0.1113, "theoretical_loss": 4.412543710529766, "tokens_seen": 189792256 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.002621724735945463, "objective/train/docs_used": 76077, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.243046760559082, "objective/train/original_loss": 2.243046760559082, "objective/train/theoretical_loss": 4.411780503226238, "objective/train/tokens_used": 210514400, "objective/train/value_avg": -0.00859832763671875, "objective/train/value_loss": 0.0002450415340717882, "objective/train/value_max": -0.0006189346313476562, "objective/train/value_min": -0.332275390625, "objective/train/value_reward_corr": 0.4247518874973535, "objective/train/value_std": 0.008056640625, "objective/train/weight_avg": 1.002734899520874, "objective/train/weighted_lm_loss": 2.2489123344421387, "objective/train/weights_max": 1.2309222221374512, "objective/train/weights_min": 0.3706021308898926, "theoretical_loss": 4.411780503226238, "tokens_seen": 190054400 }, { "epoch": 0.06, "learning_rate": 0.0009519338789921361, "loss": 0.1102, "theoretical_loss": 4.411780503226238, "tokens_seen": 190054400 }, { "epoch": 0.06, "learning_rate": 0.0009518536350505537, "loss": 0.1125, "theoretical_loss": 4.4110186421902835, "tokens_seen": 190316544 }, { "epoch": 0.06, "learning_rate": 0.0009517733911089713, "loss": 0.1074, "theoretical_loss": 4.4102581231976785, "tokens_seen": 190578688 }, { "epoch": 0.06, "learning_rate": 0.0009516931471673888, "loss": 0.1119, "theoretical_loss": 4.409498942043237, "tokens_seen": 190840832 }, { "epoch": 0.06, "learning_rate": 0.0009516129032258065, "loss": 0.1106, "theoretical_loss": 4.408741094540707, "tokens_seen": 191102976 }, { "epoch": 0.06, "learning_rate": 0.000951532659284224, "loss": 0.108, "theoretical_loss": 4.407984576522653, "tokens_seen": 191365120 }, { "epoch": 0.06, "learning_rate": 0.0009514524153426416, "loss": 0.1106, "theoretical_loss": 4.407229383840347, "tokens_seen": 191627264 }, { "epoch": 0.06, "learning_rate": 0.0009513721714010593, "loss": 0.1092, "theoretical_loss": 4.406475512363663, "tokens_seen": 191889408 }, { "epoch": 0.06, "learning_rate": 0.0009512919274594768, "loss": 0.1056, "theoretical_loss": 4.405722957980962, "tokens_seen": 192151552 }, { "epoch": 0.06, "learning_rate": 0.0009512116835178944, "loss": 0.1093, "theoretical_loss": 4.40497171659899, "tokens_seen": 192413696 }, { "epoch": 0.06, "learning_rate": 0.000951131439576312, "loss": 0.1061, "theoretical_loss": 4.404221784142768, "tokens_seen": 192675840 }, { "epoch": 0.06, "learning_rate": 0.0009510511956347296, "loss": 0.1098, "theoretical_loss": 4.403473156555487, "tokens_seen": 192937984 }, { "epoch": 0.06, "learning_rate": 0.0009509709516931472, "loss": 0.107, "theoretical_loss": 4.402725829798397, "tokens_seen": 193200128 }, { "epoch": 0.06, "learning_rate": 0.0009508907077515648, "loss": 0.1066, "theoretical_loss": 4.4019797998507135, "tokens_seen": 193462272 }, { "epoch": 0.06, "learning_rate": 0.0009508104638099823, "loss": 0.1042, "theoretical_loss": 4.401235062709502, "tokens_seen": 193724416 }, { "epoch": 0.06, "learning_rate": 0.0009507302198684, "loss": 0.1081, "theoretical_loss": 4.400491614389582, "tokens_seen": 193986560 }, { "epoch": 0.06, "learning_rate": 0.0009506499759268176, "loss": 0.1048, "theoretical_loss": 4.3997494509234185, "tokens_seen": 194248704 }, { "epoch": 0.06, "learning_rate": 0.0009505697319852351, "loss": 0.1083, "theoretical_loss": 4.399008568361027, "tokens_seen": 194510848 }, { "epoch": 0.06, "learning_rate": 0.0009504894880436528, "loss": 0.1097, "theoretical_loss": 4.398268962769867, "tokens_seen": 194772992 }, { "epoch": 0.06, "learning_rate": 0.0009504092441020703, "loss": 0.1125, "theoretical_loss": 4.397530630234744, "tokens_seen": 195035136 }, { "epoch": 0.06, "learning_rate": 0.0009503290001604879, "loss": 0.1067, "theoretical_loss": 4.396793566857708, "tokens_seen": 195297280 }, { "epoch": 0.06, "learning_rate": 0.0009502487562189055, "loss": 0.1057, "theoretical_loss": 4.396057768757957, "tokens_seen": 195559424 }, { "epoch": 0.06, "learning_rate": 0.000950168512277323, "loss": 0.1076, "theoretical_loss": 4.395323232071737, "tokens_seen": 195821568 }, { "epoch": 0.06, "learning_rate": 0.0009500882683357406, "loss": 0.1075, "theoretical_loss": 4.394589952952247, "tokens_seen": 196083712 }, { "epoch": 0.06, "learning_rate": 0.0009500080243941583, "loss": 0.1034, "theoretical_loss": 4.393857927569534, "tokens_seen": 196345856 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0034133398439735174, "objective/train/docs_used": 78313, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.0345728397369385, "objective/train/original_loss": 2.0345726013183594, "objective/train/theoretical_loss": 4.393127152110409, "objective/train/tokens_used": 217068000, "objective/train/value_avg": -0.01177978515625, "objective/train/value_loss": 0.00046790740452706814, "objective/train/value_max": -0.00142669677734375, "objective/train/value_min": -0.64306640625, "objective/train/value_reward_corr": 0.5403801085285318, "objective/train/value_std": 0.01461029052734375, "objective/train/weight_avg": 1.0036334991455078, "objective/train/weighted_lm_loss": 2.0406572818756104, "objective/train/weights_max": 1.7312231063842773, "objective/train/weights_min": 0.3701556921005249, "theoretical_loss": 4.393127152110409, "tokens_seen": 196608000 }, { "epoch": 0.06, "learning_rate": 0.0009499277804525759, "loss": 0.1063, "theoretical_loss": 4.393127152110409, "tokens_seen": 196608000 }, { "epoch": 0.06, "learning_rate": 0.0009498475365109934, "loss": 0.1069, "theoretical_loss": 4.392397622778343, "tokens_seen": 196870144 }, { "epoch": 0.06, "learning_rate": 0.0009497672925694111, "loss": 0.1068, "theoretical_loss": 4.391669335793372, "tokens_seen": 197132288 }, { "epoch": 0.06, "learning_rate": 0.0009496870486278286, "loss": 0.1043, "theoretical_loss": 4.39094228739201, "tokens_seen": 197394432 }, { "epoch": 0.06, "learning_rate": 0.0009496068046862462, "loss": 0.1086, "theoretical_loss": 4.390216473827143, "tokens_seen": 197656576 }, { "epoch": 0.06, "learning_rate": 0.0009495265607446638, "loss": 0.1027, "theoretical_loss": 4.389491891367953, "tokens_seen": 197918720 }, { "epoch": 0.06, "learning_rate": 0.0009494463168030813, "loss": 0.1075, "theoretical_loss": 4.388768536299808, "tokens_seen": 198180864 }, { "epoch": 0.06, "learning_rate": 0.000949366072861499, "loss": 0.1096, "theoretical_loss": 4.388046404924184, "tokens_seen": 198443008 }, { "epoch": 0.06, "learning_rate": 0.0009492858289199166, "loss": 0.1058, "theoretical_loss": 4.387325493558566, "tokens_seen": 198705152 }, { "epoch": 0.06, "learning_rate": 0.0009492055849783342, "loss": 0.1068, "theoretical_loss": 4.386605798536362, "tokens_seen": 198967296 }, { "epoch": 0.06, "learning_rate": 0.0009491253410367518, "loss": 0.109, "theoretical_loss": 4.385887316206812, "tokens_seen": 199229440 }, { "epoch": 0.06, "learning_rate": 0.0009490450970951693, "loss": 0.1029, "theoretical_loss": 4.385170042934896, "tokens_seen": 199491584 }, { "epoch": 0.06, "learning_rate": 0.0009489648531535869, "loss": 0.1046, "theoretical_loss": 4.384453975101251, "tokens_seen": 199753728 }, { "epoch": 0.06, "learning_rate": 0.0009488846092120045, "loss": 0.107, "theoretical_loss": 4.38373910910208, "tokens_seen": 200015872 }, { "epoch": 0.06, "learning_rate": 0.0009488043652704221, "loss": 0.1056, "theoretical_loss": 4.383025441349063, "tokens_seen": 200278016 }, { "epoch": 0.06, "learning_rate": 0.0009487241213288396, "loss": 0.1099, "theoretical_loss": 4.382312968269276, "tokens_seen": 200540160 }, { "epoch": 0.06, "learning_rate": 0.0009486438773872573, "loss": 0.105, "theoretical_loss": 4.381601686305098, "tokens_seen": 200802304 }, { "epoch": 0.06, "learning_rate": 0.0009485636334456749, "loss": 0.1039, "theoretical_loss": 4.38089159191413, "tokens_seen": 201064448 }, { "epoch": 0.06, "learning_rate": 0.0009484833895040924, "loss": 0.1014, "theoretical_loss": 4.380182681569111, "tokens_seen": 201326592 }, { "epoch": 0.06, "learning_rate": 0.0009484031455625101, "loss": 0.1062, "theoretical_loss": 4.379474951757829, "tokens_seen": 201588736 }, { "epoch": 0.06, "learning_rate": 0.0009483229016209276, "loss": 0.107, "theoretical_loss": 4.378768398983042, "tokens_seen": 201850880 }, { "epoch": 0.06, "learning_rate": 0.0009482426576793453, "loss": 0.1042, "theoretical_loss": 4.378063019762392, "tokens_seen": 202113024 }, { "epoch": 0.06, "learning_rate": 0.0009481624137377628, "loss": 0.105, "theoretical_loss": 4.377358810628324, "tokens_seen": 202375168 }, { "epoch": 0.06, "learning_rate": 0.0009480821697961804, "loss": 0.1045, "theoretical_loss": 4.3766557681280025, "tokens_seen": 202637312 }, { "epoch": 0.06, "learning_rate": 0.000948001925854598, "loss": 0.1059, "theoretical_loss": 4.375953888823233, "tokens_seen": 202899456 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0012575258733704686, "objective/train/docs_used": 80605, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.184474229812622, "objective/train/original_loss": 2.184473991394043, "objective/train/theoretical_loss": 4.375253169290376, "objective/train/tokens_used": 223621600, "objective/train/value_avg": -0.01114654541015625, "objective/train/value_loss": 0.000553421676158905, "objective/train/value_max": -0.0007014274597167969, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.5880496589440656, "objective/train/value_std": 0.0163116455078125, "objective/train/weight_avg": 1.0015053749084473, "objective/train/weighted_lm_loss": 2.1872315406799316, "objective/train/weights_max": 1.1879366636276245, "objective/train/weights_min": 0.3710477352142334, "theoretical_loss": 4.375253169290376, "tokens_seen": 203161600 }, { "epoch": 0.06, "learning_rate": 0.0009479216819130155, "loss": 0.1046, "theoretical_loss": 4.375253169290376, "tokens_seen": 203161600 }, { "epoch": 0.06, "learning_rate": 0.0009478414379714331, "loss": 0.1057, "theoretical_loss": 4.374553606120274, "tokens_seen": 203423744 }, { "epoch": 0.06, "learning_rate": 0.0009477611940298508, "loss": 0.1046, "theoretical_loss": 4.373855195918162, "tokens_seen": 203685888 }, { "epoch": 0.06, "learning_rate": 0.0009476809500882684, "loss": 0.1076, "theoretical_loss": 4.3731579353036, "tokens_seen": 203948032 }, { "epoch": 0.06, "learning_rate": 0.0009476007061466859, "loss": 0.1036, "theoretical_loss": 4.372461820910382, "tokens_seen": 204210176 }, { "epoch": 0.06, "learning_rate": 0.0009475204622051036, "loss": 0.1063, "theoretical_loss": 4.371766849386468, "tokens_seen": 204472320 }, { "epoch": 0.06, "learning_rate": 0.0009474402182635211, "loss": 0.1066, "theoretical_loss": 4.3710730173939005, "tokens_seen": 204734464 }, { "epoch": 0.06, "learning_rate": 0.0009473599743219387, "loss": 0.1061, "theoretical_loss": 4.370380321608731, "tokens_seen": 204996608 }, { "epoch": 0.06, "learning_rate": 0.0009472797303803563, "loss": 0.1077, "theoretical_loss": 4.369688758720937, "tokens_seen": 205258752 }, { "epoch": 0.06, "learning_rate": 0.0009471994864387738, "loss": 0.1037, "theoretical_loss": 4.368998325434355, "tokens_seen": 205520896 }, { "epoch": 0.06, "learning_rate": 0.0009471192424971916, "loss": 0.1023, "theoretical_loss": 4.3683090184666, "tokens_seen": 205783040 }, { "epoch": 0.06, "learning_rate": 0.0009470389985556091, "loss": 0.1015, "theoretical_loss": 4.367620834548987, "tokens_seen": 206045184 }, { "epoch": 0.06, "learning_rate": 0.0009469587546140267, "loss": 0.104, "theoretical_loss": 4.3669337704264635, "tokens_seen": 206307328 }, { "epoch": 0.06, "learning_rate": 0.0009468785106724443, "loss": 0.1068, "theoretical_loss": 4.366247822857533, "tokens_seen": 206569472 }, { "epoch": 0.06, "learning_rate": 0.0009467982667308619, "loss": 0.1048, "theoretical_loss": 4.365562988614176, "tokens_seen": 206831616 }, { "epoch": 0.06, "learning_rate": 0.0009467180227892794, "loss": 0.1034, "theoretical_loss": 4.364879264481787, "tokens_seen": 207093760 }, { "epoch": 0.06, "learning_rate": 0.000946637778847697, "loss": 0.1035, "theoretical_loss": 4.364196647259092, "tokens_seen": 207355904 }, { "epoch": 0.06, "learning_rate": 0.0009465575349061146, "loss": 0.1061, "theoretical_loss": 4.363515133758084, "tokens_seen": 207618048 }, { "epoch": 0.06, "learning_rate": 0.0009464772909645321, "loss": 0.1075, "theoretical_loss": 4.3628347208039475, "tokens_seen": 207880192 }, { "epoch": 0.06, "learning_rate": 0.0009463970470229499, "loss": 0.1018, "theoretical_loss": 4.362155405234985, "tokens_seen": 208142336 }, { "epoch": 0.06, "learning_rate": 0.0009463168030813674, "loss": 0.109, "theoretical_loss": 4.361477183902554, "tokens_seen": 208404480 }, { "epoch": 0.06, "learning_rate": 0.000946236559139785, "loss": 0.1052, "theoretical_loss": 4.360800053670989, "tokens_seen": 208666624 }, { "epoch": 0.06, "learning_rate": 0.0009461563151982026, "loss": 0.1031, "theoretical_loss": 4.360124011417536, "tokens_seen": 208928768 }, { "epoch": 0.06, "learning_rate": 0.0009460760712566201, "loss": 0.1047, "theoretical_loss": 4.359449054032282, "tokens_seen": 209190912 }, { "epoch": 0.06, "learning_rate": 0.0009459958273150377, "loss": 0.1042, "theoretical_loss": 4.358775178418089, "tokens_seen": 209453056 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0007151683676056564, "objective/train/docs_used": 82917, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.0514161586761475, "objective/train/original_loss": 2.0514163970947266, "objective/train/theoretical_loss": 4.358102381490517, "objective/train/tokens_used": 230175200, "objective/train/value_avg": -0.01009368896484375, "objective/train/value_loss": 0.0004249107150826603, "objective/train/value_max": -0.0008263587951660156, "objective/train/value_min": -0.296142578125, "objective/train/value_reward_corr": 0.48272459031421056, "objective/train/value_std": 0.01027679443359375, "objective/train/weight_avg": 1.000907063484192, "objective/train/weighted_lm_loss": 2.0531537532806396, "objective/train/weights_max": 1.2134668827056885, "objective/train/weights_min": 0.370398610830307, "theoretical_loss": 4.358102381490517, "tokens_seen": 209715200 }, { "epoch": 0.06, "learning_rate": 0.0009459155833734553, "loss": 0.1048, "theoretical_loss": 4.358102381490517, "tokens_seen": 209715200 }, { "epoch": 0.06, "learning_rate": 0.0009458353394318729, "loss": 0.1051, "theoretical_loss": 4.3574306601777675, "tokens_seen": 209977344 }, { "epoch": 0.06, "learning_rate": 0.0009457550954902905, "loss": 0.1009, "theoretical_loss": 4.356760011420608, "tokens_seen": 210239488 }, { "epoch": 0.06, "learning_rate": 0.0009456748515487081, "loss": 0.1085, "theoretical_loss": 4.3560904321723095, "tokens_seen": 210501632 }, { "epoch": 0.06, "learning_rate": 0.0009455946076071257, "loss": 0.1056, "theoretical_loss": 4.355421919398576, "tokens_seen": 210763776 }, { "epoch": 0.06, "learning_rate": 0.0009455143636655433, "loss": 0.1054, "theoretical_loss": 4.354754470077481, "tokens_seen": 211025920 }, { "epoch": 0.06, "learning_rate": 0.0009454341197239609, "loss": 0.1029, "theoretical_loss": 4.354088081199402, "tokens_seen": 211288064 }, { "epoch": 0.06, "learning_rate": 0.0009453538757823784, "loss": 0.1038, "theoretical_loss": 4.3534227497669535, "tokens_seen": 211550208 }, { "epoch": 0.06, "learning_rate": 0.0009452736318407961, "loss": 0.1031, "theoretical_loss": 4.352758472794923, "tokens_seen": 211812352 }, { "epoch": 0.06, "learning_rate": 0.0009451933878992136, "loss": 0.1055, "theoretical_loss": 4.352095247310208, "tokens_seen": 212074496 }, { "epoch": 0.06, "learning_rate": 0.0009451131439576312, "loss": 0.1042, "theoretical_loss": 4.351433070351748, "tokens_seen": 212336640 }, { "epoch": 0.06, "learning_rate": 0.0009450329000160488, "loss": 0.1056, "theoretical_loss": 4.350771938970466, "tokens_seen": 212598784 }, { "epoch": 0.06, "learning_rate": 0.0009449526560744663, "loss": 0.1022, "theoretical_loss": 4.350111850229202, "tokens_seen": 212860928 }, { "epoch": 0.06, "learning_rate": 0.000944872412132884, "loss": 0.101, "theoretical_loss": 4.34945280120265, "tokens_seen": 213123072 }, { "epoch": 0.06, "learning_rate": 0.0009447921681913016, "loss": 0.1025, "theoretical_loss": 4.348794788977298, "tokens_seen": 213385216 }, { "epoch": 0.06, "learning_rate": 0.0009447119242497192, "loss": 0.1035, "theoretical_loss": 4.348137810651366, "tokens_seen": 213647360 }, { "epoch": 0.06, "learning_rate": 0.0009446316803081368, "loss": 0.1016, "theoretical_loss": 4.347481863334738, "tokens_seen": 213909504 }, { "epoch": 0.06, "learning_rate": 0.0009445514363665544, "loss": 0.104, "theoretical_loss": 4.346826944148912, "tokens_seen": 214171648 }, { "epoch": 0.06, "learning_rate": 0.0009444711924249719, "loss": 0.1048, "theoretical_loss": 4.3461730502269305, "tokens_seen": 214433792 }, { "epoch": 0.07, "learning_rate": 0.0009443909484833896, "loss": 0.1036, "theoretical_loss": 4.345520178713323, "tokens_seen": 214695936 }, { "epoch": 0.07, "learning_rate": 0.0009443107045418071, "loss": 0.1078, "theoretical_loss": 4.344868326764045, "tokens_seen": 214958080 }, { "epoch": 0.07, "learning_rate": 0.0009442304606002246, "loss": 0.1053, "theoretical_loss": 4.344217491546422, "tokens_seen": 215220224 }, { "epoch": 0.07, "learning_rate": 0.0009441502166586424, "loss": 0.1035, "theoretical_loss": 4.343567670239084, "tokens_seen": 215482368 }, { "epoch": 0.07, "learning_rate": 0.0009440699727170599, "loss": 0.1054, "theoretical_loss": 4.342918860031914, "tokens_seen": 215744512 }, { "epoch": 0.07, "learning_rate": 0.0009439897287754775, "loss": 0.1003, "theoretical_loss": 4.342271058125983, "tokens_seen": 216006656 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0016028911340981722, "objective/train/docs_used": 85191, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.0327014923095703, "objective/train/original_loss": 2.032701253890991, "objective/train/theoretical_loss": 4.341624261733497, "objective/train/tokens_used": 236728800, "objective/train/value_avg": -0.00833892822265625, "objective/train/value_loss": 0.00031526113161817193, "objective/train/value_max": -0.0006666183471679688, "objective/train/value_min": -0.25830078125, "objective/train/value_reward_corr": 0.44630488005091173, "objective/train/value_std": 0.008331298828125, "objective/train/weight_avg": 1.0017452239990234, "objective/train/weighted_lm_loss": 2.0359926223754883, "objective/train/weights_max": 1.1291202306747437, "objective/train/weights_min": 0.36889123916625977, "theoretical_loss": 4.341624261733497, "tokens_seen": 216268800 }, { "epoch": 0.07, "learning_rate": 0.0009439094848338951, "loss": 0.1044, "theoretical_loss": 4.341624261733497, "tokens_seen": 216268800 }, { "epoch": 0.07, "learning_rate": 0.0009438292408923127, "loss": 0.1056, "theoretical_loss": 4.340978468077735, "tokens_seen": 216530944 }, { "epoch": 0.07, "learning_rate": 0.0009437489969507302, "loss": 0.1036, "theoretical_loss": 4.340333674392992, "tokens_seen": 216793088 }, { "epoch": 0.07, "learning_rate": 0.0009436687530091478, "loss": 0.105, "theoretical_loss": 4.339689877924531, "tokens_seen": 217055232 }, { "epoch": 0.07, "learning_rate": 0.0009435885090675654, "loss": 0.1036, "theoretical_loss": 4.3390470759285105, "tokens_seen": 217317376 }, { "epoch": 0.07, "learning_rate": 0.0009435082651259829, "loss": 0.1036, "theoretical_loss": 4.338405265671941, "tokens_seen": 217579520 }, { "epoch": 0.07, "learning_rate": 0.0009434280211844006, "loss": 0.099, "theoretical_loss": 4.337764444432625, "tokens_seen": 217841664 }, { "epoch": 0.07, "learning_rate": 0.0009433477772428182, "loss": 0.1036, "theoretical_loss": 4.337124609499101, "tokens_seen": 218103808 }, { "epoch": 0.07, "learning_rate": 0.0009432675333012359, "loss": 0.1, "theoretical_loss": 4.336485758170589, "tokens_seen": 218365952 }, { "epoch": 0.07, "learning_rate": 0.0009431872893596534, "loss": 0.1058, "theoretical_loss": 4.335847887756934, "tokens_seen": 218628096 }, { "epoch": 0.07, "learning_rate": 0.0009431070454180709, "loss": 0.1014, "theoretical_loss": 4.335210995578553, "tokens_seen": 218890240 }, { "epoch": 0.07, "learning_rate": 0.0009430268014764886, "loss": 0.1035, "theoretical_loss": 4.334575078966383, "tokens_seen": 219152384 }, { "epoch": 0.07, "learning_rate": 0.0009429465575349061, "loss": 0.1044, "theoretical_loss": 4.333940135261823, "tokens_seen": 219414528 }, { "epoch": 0.07, "learning_rate": 0.0009428663135933237, "loss": 0.1014, "theoretical_loss": 4.333306161816684, "tokens_seen": 219676672 }, { "epoch": 0.07, "learning_rate": 0.0009427860696517413, "loss": 0.1056, "theoretical_loss": 4.332673155993131, "tokens_seen": 219938816 }, { "epoch": 0.07, "learning_rate": 0.000942705825710159, "loss": 0.1012, "theoretical_loss": 4.332041115163636, "tokens_seen": 220200960 }, { "epoch": 0.07, "learning_rate": 0.0009426255817685765, "loss": 0.1045, "theoretical_loss": 4.331410036710925, "tokens_seen": 220463104 }, { "epoch": 0.07, "learning_rate": 0.0009425453378269941, "loss": 0.1026, "theoretical_loss": 4.330779918027919, "tokens_seen": 220725248 }, { "epoch": 0.07, "learning_rate": 0.0009424650938854117, "loss": 0.1044, "theoretical_loss": 4.330150756517692, "tokens_seen": 220987392 }, { "epoch": 0.07, "learning_rate": 0.0009423848499438292, "loss": 0.1022, "theoretical_loss": 4.3295225495934115, "tokens_seen": 221249536 }, { "epoch": 0.07, "learning_rate": 0.0009423046060022469, "loss": 0.0974, "theoretical_loss": 4.328895294678292, "tokens_seen": 221511680 }, { "epoch": 0.07, "learning_rate": 0.0009422243620606644, "loss": 0.1027, "theoretical_loss": 4.32826898920554, "tokens_seen": 221773824 }, { "epoch": 0.07, "learning_rate": 0.0009421441181190821, "loss": 0.1052, "theoretical_loss": 4.3276436306183115, "tokens_seen": 222035968 }, { "epoch": 0.07, "learning_rate": 0.0009420638741774996, "loss": 0.0996, "theoretical_loss": 4.327019216369651, "tokens_seen": 222298112 }, { "epoch": 0.07, "learning_rate": 0.0009419836302359171, "loss": 0.101, "theoretical_loss": 4.32639574392245, "tokens_seen": 222560256 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.002008789451792836, "objective/train/docs_used": 87601, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8882607221603394, "objective/train/original_loss": 1.8882604837417603, "objective/train/theoretical_loss": 4.325773210749392, "objective/train/tokens_used": 243282400, "objective/train/value_avg": -0.01070404052734375, "objective/train/value_loss": 0.0003292278270237148, "objective/train/value_max": -0.0006799697875976562, "objective/train/value_min": -0.82373046875, "objective/train/value_reward_corr": 0.602646911223948, "objective/train/value_std": 0.01348876953125, "objective/train/weight_avg": 1.002161979675293, "objective/train/weighted_lm_loss": 1.892277717590332, "objective/train/weights_max": 1.4122933149337769, "objective/train/weights_min": 0.370164155960083, "theoretical_loss": 4.325773210749392, "tokens_seen": 222822400 }, { "epoch": 0.07, "learning_rate": 0.0009419033862943349, "loss": 0.0975, "theoretical_loss": 4.325773210749392, "tokens_seen": 222822400 }, { "epoch": 0.07, "learning_rate": 0.0009418231423527524, "loss": 0.1018, "theoretical_loss": 4.325151614332908, "tokens_seen": 223084544 }, { "epoch": 0.07, "learning_rate": 0.00094174289841117, "loss": 0.1032, "theoretical_loss": 4.3245309521651265, "tokens_seen": 223346688 }, { "epoch": 0.07, "learning_rate": 0.0009416626544695876, "loss": 0.103, "theoretical_loss": 4.323911221747817, "tokens_seen": 223608832 }, { "epoch": 0.07, "learning_rate": 0.0009415824105280052, "loss": 0.1049, "theoretical_loss": 4.323292420592356, "tokens_seen": 223870976 }, { "epoch": 0.07, "learning_rate": 0.0009415021665864227, "loss": 0.1024, "theoretical_loss": 4.322674546219666, "tokens_seen": 224133120 }, { "epoch": 0.07, "learning_rate": 0.0009414219226448404, "loss": 0.1029, "theoretical_loss": 4.322057596160174, "tokens_seen": 224395264 }, { "epoch": 0.07, "learning_rate": 0.0009413416787032579, "loss": 0.0989, "theoretical_loss": 4.321441567953762, "tokens_seen": 224657408 }, { "epoch": 0.07, "learning_rate": 0.0009412614347616754, "loss": 0.1013, "theoretical_loss": 4.320826459149725, "tokens_seen": 224919552 }, { "epoch": 0.07, "learning_rate": 0.0009411811908200932, "loss": 0.1075, "theoretical_loss": 4.3202122673067125, "tokens_seen": 225181696 }, { "epoch": 0.07, "learning_rate": 0.0009411009468785107, "loss": 0.1004, "theoretical_loss": 4.319598989992695, "tokens_seen": 225443840 }, { "epoch": 0.07, "learning_rate": 0.0009410207029369283, "loss": 0.1046, "theoretical_loss": 4.318986624784908, "tokens_seen": 225705984 }, { "epoch": 0.07, "learning_rate": 0.0009409404589953459, "loss": 0.103, "theoretical_loss": 4.318375169269813, "tokens_seen": 225968128 }, { "epoch": 0.07, "learning_rate": 0.0009408602150537635, "loss": 0.1013, "theoretical_loss": 4.317764621043046, "tokens_seen": 226230272 }, { "epoch": 0.07, "learning_rate": 0.0009407799711121811, "loss": 0.0995, "theoretical_loss": 4.317154977709375, "tokens_seen": 226492416 }, { "epoch": 0.07, "learning_rate": 0.0009406997271705986, "loss": 0.0999, "theoretical_loss": 4.3165462368826555, "tokens_seen": 226754560 }, { "epoch": 0.07, "learning_rate": 0.0009406194832290162, "loss": 0.0995, "theoretical_loss": 4.315938396185782, "tokens_seen": 227016704 }, { "epoch": 0.07, "learning_rate": 0.0009405392392874338, "loss": 0.1, "theoretical_loss": 4.315331453250648, "tokens_seen": 227278848 }, { "epoch": 0.07, "learning_rate": 0.0009404589953458514, "loss": 0.0992, "theoretical_loss": 4.314725405718099, "tokens_seen": 227540992 }, { "epoch": 0.07, "learning_rate": 0.000940378751404269, "loss": 0.0997, "theoretical_loss": 4.314120251237887, "tokens_seen": 227803136 }, { "epoch": 0.07, "learning_rate": 0.0009402985074626867, "loss": 0.1027, "theoretical_loss": 4.31351598746863, "tokens_seen": 228065280 }, { "epoch": 0.07, "learning_rate": 0.0009402182635211042, "loss": 0.1014, "theoretical_loss": 4.312912612077767, "tokens_seen": 228327424 }, { "epoch": 0.07, "learning_rate": 0.0009401380195795217, "loss": 0.1017, "theoretical_loss": 4.312310122741512, "tokens_seen": 228589568 }, { "epoch": 0.07, "learning_rate": 0.0009400577756379394, "loss": 0.1026, "theoretical_loss": 4.311708517144817, "tokens_seen": 228851712 }, { "epoch": 0.07, "learning_rate": 0.0009399775316963569, "loss": 0.102, "theoretical_loss": 4.311107792981323, "tokens_seen": 229113856 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0002358820493100211, "objective/train/docs_used": 90061, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.085197687149048, "objective/train/original_loss": 2.085197925567627, "objective/train/theoretical_loss": 4.310507947953321, "objective/train/tokens_used": 249836000, "objective/train/value_avg": -0.0097808837890625, "objective/train/value_loss": 0.0005574537208303809, "objective/train/value_max": -0.0007238388061523438, "objective/train/value_min": -0.52783203125, "objective/train/value_reward_corr": 0.7022453186963163, "objective/train/value_std": 0.01198577880859375, "objective/train/weight_avg": 1.0004955530166626, "objective/train/weighted_lm_loss": 2.0866754055023193, "objective/train/weights_max": 1.2493910789489746, "objective/train/weights_min": 0.37043818831443787, "theoretical_loss": 4.310507947953321, "tokens_seen": 229376000 }, { "epoch": 0.07, "learning_rate": 0.0009398972877547745, "loss": 0.103, "theoretical_loss": 4.310507947953321, "tokens_seen": 229376000 }, { "epoch": 0.07, "learning_rate": 0.0009398170438131921, "loss": 0.1039, "theoretical_loss": 4.309908979771709, "tokens_seen": 229638144 }, { "epoch": 0.07, "learning_rate": 0.0009397367998716097, "loss": 0.1039, "theoretical_loss": 4.3093108861559495, "tokens_seen": 229900288 }, { "epoch": 0.07, "learning_rate": 0.0009396565559300272, "loss": 0.1018, "theoretical_loss": 4.308713664834029, "tokens_seen": 230162432 }, { "epoch": 0.07, "learning_rate": 0.0009395763119884449, "loss": 0.0956, "theoretical_loss": 4.308117313542413, "tokens_seen": 230424576 }, { "epoch": 0.07, "learning_rate": 0.0009394960680468625, "loss": 0.102, "theoretical_loss": 4.30752183002601, "tokens_seen": 230686720 }, { "epoch": 0.07, "learning_rate": 0.0009394158241052801, "loss": 0.0988, "theoretical_loss": 4.3069272120381275, "tokens_seen": 230948864 }, { "epoch": 0.07, "learning_rate": 0.0009393355801636977, "loss": 0.102, "theoretical_loss": 4.30633345734043, "tokens_seen": 231211008 }, { "epoch": 0.07, "learning_rate": 0.0009392553362221152, "loss": 0.0999, "theoretical_loss": 4.3057405637029, "tokens_seen": 231473152 }, { "epoch": 0.07, "learning_rate": 0.0009391750922805329, "loss": 0.1013, "theoretical_loss": 4.305148528903798, "tokens_seen": 231735296 }, { "epoch": 0.07, "learning_rate": 0.0009390948483389504, "loss": 0.1018, "theoretical_loss": 4.304557350729623, "tokens_seen": 231997440 }, { "epoch": 0.07, "learning_rate": 0.0009390146043973679, "loss": 0.0998, "theoretical_loss": 4.303967026975072, "tokens_seen": 232259584 }, { "epoch": 0.07, "learning_rate": 0.0009389343604557857, "loss": 0.1022, "theoretical_loss": 4.303377555442998, "tokens_seen": 232521728 }, { "epoch": 0.07, "learning_rate": 0.0009388541165142032, "loss": 0.0975, "theoretical_loss": 4.302788933944375, "tokens_seen": 232783872 }, { "epoch": 0.07, "learning_rate": 0.0009387738725726208, "loss": 0.1006, "theoretical_loss": 4.302201160298255, "tokens_seen": 233046016 }, { "epoch": 0.07, "learning_rate": 0.0009386936286310384, "loss": 0.101, "theoretical_loss": 4.301614232331733, "tokens_seen": 233308160 }, { "epoch": 0.07, "learning_rate": 0.000938613384689456, "loss": 0.1004, "theoretical_loss": 4.301028147879904, "tokens_seen": 233570304 }, { "epoch": 0.07, "learning_rate": 0.0009385331407478735, "loss": 0.0979, "theoretical_loss": 4.300442904785831, "tokens_seen": 233832448 }, { "epoch": 0.07, "learning_rate": 0.0009384528968062911, "loss": 0.1012, "theoretical_loss": 4.299858500900495, "tokens_seen": 234094592 }, { "epoch": 0.07, "learning_rate": 0.0009383726528647087, "loss": 0.1021, "theoretical_loss": 4.2992749340827725, "tokens_seen": 234356736 }, { "epoch": 0.07, "learning_rate": 0.0009382924089231262, "loss": 0.1026, "theoretical_loss": 4.298692202199386, "tokens_seen": 234618880 }, { "epoch": 0.07, "learning_rate": 0.000938212164981544, "loss": 0.1007, "theoretical_loss": 4.298110303124871, "tokens_seen": 234881024 }, { "epoch": 0.07, "learning_rate": 0.0009381319210399615, "loss": 0.102, "theoretical_loss": 4.29752923474154, "tokens_seen": 235143168 }, { "epoch": 0.07, "learning_rate": 0.0009380516770983792, "loss": 0.1003, "theoretical_loss": 4.29694899493944, "tokens_seen": 235405312 }, { "epoch": 0.07, "learning_rate": 0.0009379714331567967, "loss": 0.1013, "theoretical_loss": 4.2963695816163225, "tokens_seen": 235667456 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.001282443176023662, "objective/train/docs_used": 92471, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.023468494415283, "objective/train/original_loss": 2.023468494415283, "objective/train/theoretical_loss": 4.295790992677603, "objective/train/tokens_used": 256389600, "objective/train/value_avg": -0.00916290283203125, "objective/train/value_loss": 0.00042869814205914736, "objective/train/value_max": -0.0003845691680908203, "objective/train/value_min": -0.7470703125, "objective/train/value_reward_corr": 0.596518820655821, "objective/train/value_std": 0.0174102783203125, "objective/train/weight_avg": 1.0014816522598267, "objective/train/weighted_lm_loss": 2.02492618560791, "objective/train/weights_max": 1.7296583652496338, "objective/train/weights_min": 0.3692615330219269, "theoretical_loss": 4.295790992677603, "tokens_seen": 235929600 }, { "epoch": 0.07, "learning_rate": 0.0009378911892152143, "loss": 0.0992, "theoretical_loss": 4.295790992677603, "tokens_seen": 235929600 }, { "epoch": 0.07, "learning_rate": 0.0009378109452736319, "loss": 0.1008, "theoretical_loss": 4.2952132260363225, "tokens_seen": 236191744 }, { "epoch": 0.07, "learning_rate": 0.0009377307013320494, "loss": 0.0992, "theoretical_loss": 4.294636279613117, "tokens_seen": 236453888 }, { "epoch": 0.07, "learning_rate": 0.000937650457390467, "loss": 0.1033, "theoretical_loss": 4.294060151336178, "tokens_seen": 236716032 }, { "epoch": 0.07, "learning_rate": 0.0009375702134488846, "loss": 0.1017, "theoretical_loss": 4.293484839141217, "tokens_seen": 236978176 }, { "epoch": 0.07, "learning_rate": 0.0009374899695073022, "loss": 0.0994, "theoretical_loss": 4.29291034097143, "tokens_seen": 237240320 }, { "epoch": 0.07, "learning_rate": 0.0009374097255657198, "loss": 0.0964, "theoretical_loss": 4.2923366547774595, "tokens_seen": 237502464 }, { "epoch": 0.07, "learning_rate": 0.0009373294816241375, "loss": 0.0991, "theoretical_loss": 4.2917637785173675, "tokens_seen": 237764608 }, { "epoch": 0.07, "learning_rate": 0.000937249237682555, "loss": 0.1, "theoretical_loss": 4.291191710156591, "tokens_seen": 238026752 }, { "epoch": 0.07, "learning_rate": 0.0009371689937409725, "loss": 0.0996, "theoretical_loss": 4.290620447667912, "tokens_seen": 238288896 }, { "epoch": 0.07, "learning_rate": 0.0009370887497993902, "loss": 0.0973, "theoretical_loss": 4.290049989031424, "tokens_seen": 238551040 }, { "epoch": 0.07, "learning_rate": 0.0009370085058578077, "loss": 0.1026, "theoretical_loss": 4.289480332234493, "tokens_seen": 238813184 }, { "epoch": 0.07, "learning_rate": 0.0009369282619162254, "loss": 0.1009, "theoretical_loss": 4.288911475271731, "tokens_seen": 239075328 }, { "epoch": 0.07, "learning_rate": 0.0009368480179746429, "loss": 0.0999, "theoretical_loss": 4.288343416144952, "tokens_seen": 239337472 }, { "epoch": 0.07, "learning_rate": 0.0009367677740330605, "loss": 0.1019, "theoretical_loss": 4.287776152863146, "tokens_seen": 239599616 }, { "epoch": 0.07, "learning_rate": 0.0009366875300914782, "loss": 0.1016, "theoretical_loss": 4.287209683442444, "tokens_seen": 239861760 }, { "epoch": 0.07, "learning_rate": 0.0009366072861498957, "loss": 0.1001, "theoretical_loss": 4.286644005906081, "tokens_seen": 240123904 }, { "epoch": 0.07, "learning_rate": 0.0009365270422083133, "loss": 0.1011, "theoretical_loss": 4.286079118284368, "tokens_seen": 240386048 }, { "epoch": 0.07, "learning_rate": 0.0009364467982667309, "loss": 0.1036, "theoretical_loss": 4.285515018614655, "tokens_seen": 240648192 }, { "epoch": 0.07, "learning_rate": 0.0009363665543251485, "loss": 0.0961, "theoretical_loss": 4.2849517049412995, "tokens_seen": 240910336 }, { "epoch": 0.07, "learning_rate": 0.000936286310383566, "loss": 0.0975, "theoretical_loss": 4.284389175315636, "tokens_seen": 241172480 }, { "epoch": 0.07, "learning_rate": 0.0009362060664419837, "loss": 0.0993, "theoretical_loss": 4.283827427795939, "tokens_seen": 241434624 }, { "epoch": 0.07, "learning_rate": 0.0009361258225004012, "loss": 0.0984, "theoretical_loss": 4.283266460447394, "tokens_seen": 241696768 }, { "epoch": 0.07, "learning_rate": 0.0009360455785588187, "loss": 0.0963, "theoretical_loss": 4.282706271342066, "tokens_seen": 241958912 }, { "epoch": 0.07, "learning_rate": 0.0009359653346172365, "loss": 0.1022, "theoretical_loss": 4.282146858558866, "tokens_seen": 242221056 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0023586128372699022, "objective/train/docs_used": 94878, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9628684520721436, "objective/train/original_loss": 1.9628684520721436, "objective/train/theoretical_loss": 4.281588220183519, "objective/train/tokens_used": 262943200, "objective/train/value_avg": -0.010406494140625, "objective/train/value_loss": 0.00042588188080117106, "objective/train/value_max": -0.0003249645233154297, "objective/train/value_min": -0.403564453125, "objective/train/value_reward_corr": 0.6426931088980384, "objective/train/value_std": 0.01442718505859375, "objective/train/weight_avg": 1.0025511980056763, "objective/train/weighted_lm_loss": 1.9680471420288086, "objective/train/weights_max": 1.2811800241470337, "objective/train/weights_min": 0.3709741532802582, "theoretical_loss": 4.281588220183519, "tokens_seen": 242483200 }, { "epoch": 0.07, "learning_rate": 0.000935885090675654, "loss": 0.0976, "theoretical_loss": 4.281588220183519, "tokens_seen": 242483200 }, { "epoch": 0.07, "learning_rate": 0.0009358048467340716, "loss": 0.0992, "theoretical_loss": 4.281030354308533, "tokens_seen": 242745344 }, { "epoch": 0.07, "learning_rate": 0.0009357246027924892, "loss": 0.0984, "theoretical_loss": 4.280473259033169, "tokens_seen": 243007488 }, { "epoch": 0.07, "learning_rate": 0.0009356443588509068, "loss": 0.0976, "theoretical_loss": 4.27991693246341, "tokens_seen": 243269632 }, { "epoch": 0.07, "learning_rate": 0.0009355641149093244, "loss": 0.0983, "theoretical_loss": 4.279361372711923, "tokens_seen": 243531776 }, { "epoch": 0.07, "learning_rate": 0.0009354838709677419, "loss": 0.1021, "theoretical_loss": 4.278806577898042, "tokens_seen": 243793920 }, { "epoch": 0.07, "learning_rate": 0.0009354036270261595, "loss": 0.0976, "theoretical_loss": 4.278252546147724, "tokens_seen": 244056064 }, { "epoch": 0.07, "learning_rate": 0.0009353233830845771, "loss": 0.0992, "theoretical_loss": 4.277699275593523, "tokens_seen": 244318208 }, { "epoch": 0.07, "learning_rate": 0.0009352431391429948, "loss": 0.0946, "theoretical_loss": 4.277146764374566, "tokens_seen": 244580352 }, { "epoch": 0.07, "learning_rate": 0.0009351628952014123, "loss": 0.0996, "theoretical_loss": 4.276595010636514, "tokens_seen": 244842496 }, { "epoch": 0.07, "learning_rate": 0.00093508265125983, "loss": 0.0991, "theoretical_loss": 4.276044012531534, "tokens_seen": 245104640 }, { "epoch": 0.07, "learning_rate": 0.0009350024073182475, "loss": 0.1007, "theoretical_loss": 4.275493768218274, "tokens_seen": 245366784 }, { "epoch": 0.07, "learning_rate": 0.0009349221633766651, "loss": 0.0973, "theoretical_loss": 4.274944275861828, "tokens_seen": 245628928 }, { "epoch": 0.07, "learning_rate": 0.0009348419194350827, "loss": 0.0975, "theoretical_loss": 4.274395533633712, "tokens_seen": 245891072 }, { "epoch": 0.07, "learning_rate": 0.0009347616754935002, "loss": 0.0955, "theoretical_loss": 4.273847539711825, "tokens_seen": 246153216 }, { "epoch": 0.07, "learning_rate": 0.0009346814315519178, "loss": 0.1006, "theoretical_loss": 4.273300292280435, "tokens_seen": 246415360 }, { "epoch": 0.07, "learning_rate": 0.0009346011876103354, "loss": 0.0985, "theoretical_loss": 4.272753789530134, "tokens_seen": 246677504 }, { "epoch": 0.07, "learning_rate": 0.000934520943668753, "loss": 0.0955, "theoretical_loss": 4.272208029657822, "tokens_seen": 246939648 }, { "epoch": 0.07, "learning_rate": 0.0009344406997271707, "loss": 0.0968, "theoretical_loss": 4.271663010866669, "tokens_seen": 247201792 }, { "epoch": 0.07, "learning_rate": 0.0009343604557855883, "loss": 0.0999, "theoretical_loss": 4.2711187313660925, "tokens_seen": 247463936 }, { "epoch": 0.08, "learning_rate": 0.0009342802118440058, "loss": 0.0981, "theoretical_loss": 4.270575189371727, "tokens_seen": 247726080 }, { "epoch": 0.08, "learning_rate": 0.0009341999679024234, "loss": 0.0988, "theoretical_loss": 4.270032383105398, "tokens_seen": 247988224 }, { "epoch": 0.08, "learning_rate": 0.000934119723960841, "loss": 0.097, "theoretical_loss": 4.269490310795089, "tokens_seen": 248250368 }, { "epoch": 0.08, "learning_rate": 0.0009340394800192585, "loss": 0.1021, "theoretical_loss": 4.268948970674917, "tokens_seen": 248512512 }, { "epoch": 0.08, "learning_rate": 0.0009339592360776762, "loss": 0.101, "theoretical_loss": 4.268408360985109, "tokens_seen": 248774656 }, { "epoch": 0.08, "objective/train/advantage_avg": 5.6569744629086927e-05, "objective/train/docs_used": 97104, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9197639226913452, "objective/train/original_loss": 1.9197638034820557, "objective/train/theoretical_loss": 4.267868479971966, "objective/train/tokens_used": 269496800, "objective/train/value_avg": -0.00948333740234375, "objective/train/value_loss": 0.0004118235665373504, "objective/train/value_max": -0.0005464553833007812, "objective/train/value_min": -0.8017578125, "objective/train/value_reward_corr": 0.6645390257593566, "objective/train/value_std": 0.015777587890625, "objective/train/weight_avg": 1.0002477169036865, "objective/train/weighted_lm_loss": 1.9196513891220093, "objective/train/weights_max": 1.9581830501556396, "objective/train/weights_min": 0.38205578923225403, "theoretical_loss": 4.267868479971966, "tokens_seen": 249036800 }, { "epoch": 0.08, "learning_rate": 0.0009338789921360937, "loss": 0.0993, "theoretical_loss": 4.267868479971966, "tokens_seen": 249036800 }, { "epoch": 0.08, "learning_rate": 0.0009337987481945113, "loss": 0.0985, "theoretical_loss": 4.267329325887841, "tokens_seen": 249298944 }, { "epoch": 0.08, "learning_rate": 0.000933718504252929, "loss": 0.0921, "theoretical_loss": 4.266790896991109, "tokens_seen": 249561088 }, { "epoch": 0.08, "learning_rate": 0.0009336382603113465, "loss": 0.1, "theoretical_loss": 4.266253191546146, "tokens_seen": 249823232 }, { "epoch": 0.08, "learning_rate": 0.0009335580163697641, "loss": 0.0989, "theoretical_loss": 4.265716207823292, "tokens_seen": 250085376 }, { "epoch": 0.08, "learning_rate": 0.0009334777724281817, "loss": 0.099, "theoretical_loss": 4.2651799440988345, "tokens_seen": 250347520 }, { "epoch": 0.08, "learning_rate": 0.0009333975284865993, "loss": 0.0971, "theoretical_loss": 4.2646443986549745, "tokens_seen": 250609664 }, { "epoch": 0.08, "learning_rate": 0.0009333172845450168, "loss": 0.0986, "theoretical_loss": 4.264109569779803, "tokens_seen": 250871808 }, { "epoch": 0.08, "learning_rate": 0.0009332370406034345, "loss": 0.1009, "theoretical_loss": 4.263575455767277, "tokens_seen": 251133952 }, { "epoch": 0.08, "learning_rate": 0.000933156796661852, "loss": 0.0992, "theoretical_loss": 4.263042054917186, "tokens_seen": 251396096 }, { "epoch": 0.08, "learning_rate": 0.0009330765527202696, "loss": 0.0989, "theoretical_loss": 4.262509365535134, "tokens_seen": 251658240 }, { "epoch": 0.08, "learning_rate": 0.0009329963087786873, "loss": 0.0963, "theoretical_loss": 4.261977385932512, "tokens_seen": 251920384 }, { "epoch": 0.08, "learning_rate": 0.0009329160648371048, "loss": 0.0979, "theoretical_loss": 4.261446114426466, "tokens_seen": 252182528 }, { "epoch": 0.08, "learning_rate": 0.0009328358208955225, "loss": 0.097, "theoretical_loss": 4.260915549339879, "tokens_seen": 252444672 }, { "epoch": 0.08, "learning_rate": 0.00093275557695394, "loss": 0.0967, "theoretical_loss": 4.2603856890013425, "tokens_seen": 252706816 }, { "epoch": 0.08, "learning_rate": 0.0009326753330123576, "loss": 0.0988, "theoretical_loss": 4.25985653174513, "tokens_seen": 252968960 }, { "epoch": 0.08, "learning_rate": 0.0009325950890707752, "loss": 0.0945, "theoretical_loss": 4.259328075911173, "tokens_seen": 253231104 }, { "epoch": 0.08, "learning_rate": 0.0009325148451291927, "loss": 0.0985, "theoretical_loss": 4.258800319845038, "tokens_seen": 253493248 }, { "epoch": 0.08, "learning_rate": 0.0009324346011876103, "loss": 0.0971, "theoretical_loss": 4.258273261897896, "tokens_seen": 253755392 }, { "epoch": 0.08, "learning_rate": 0.0009323543572460279, "loss": 0.0967, "theoretical_loss": 4.257746900426506, "tokens_seen": 254017536 }, { "epoch": 0.08, "learning_rate": 0.0009322741133044456, "loss": 0.097, "theoretical_loss": 4.25722123379318, "tokens_seen": 254279680 }, { "epoch": 0.08, "learning_rate": 0.0009321938693628631, "loss": 0.1006, "theoretical_loss": 4.256696260365768, "tokens_seen": 254541824 }, { "epoch": 0.08, "learning_rate": 0.0009321136254212808, "loss": 0.0994, "theoretical_loss": 4.256171978517629, "tokens_seen": 254803968 }, { "epoch": 0.08, "learning_rate": 0.0009320333814796983, "loss": 0.0948, "theoretical_loss": 4.255648386627607, "tokens_seen": 255066112 }, { "epoch": 0.08, "learning_rate": 0.000931953137538116, "loss": 0.0982, "theoretical_loss": 4.255125483080007, "tokens_seen": 255328256 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0025481332559138536, "objective/train/docs_used": 99403, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.921761393547058, "objective/train/original_loss": 1.9217615127563477, "objective/train/theoretical_loss": 4.254603266264572, "objective/train/tokens_used": 276050400, "objective/train/value_avg": -0.008270263671875, "objective/train/value_loss": 0.000349750422174111, "objective/train/value_max": -0.0004355907440185547, "objective/train/value_min": -0.50537109375, "objective/train/value_reward_corr": 0.5029395423614991, "objective/train/value_std": 0.01042938232421875, "objective/train/weight_avg": 1.0027012825012207, "objective/train/weighted_lm_loss": 1.9272537231445312, "objective/train/weights_max": 1.5571717023849487, "objective/train/weights_min": 0.36996087431907654, "theoretical_loss": 4.254603266264572, "tokens_seen": 255590400 }, { "epoch": 0.08, "learning_rate": 0.0009318728935965335, "loss": 0.0981, "theoretical_loss": 4.254603266264572, "tokens_seen": 255590400 }, { "epoch": 0.08, "learning_rate": 0.000931792649654951, "loss": 0.0957, "theoretical_loss": 4.254081734576458, "tokens_seen": 255852544 }, { "epoch": 0.08, "learning_rate": 0.0009317124057133687, "loss": 0.0963, "theoretical_loss": 4.253560886416212, "tokens_seen": 256114688 }, { "epoch": 0.08, "learning_rate": 0.0009316321617717862, "loss": 0.0968, "theoretical_loss": 4.253040720189746, "tokens_seen": 256376832 }, { "epoch": 0.08, "learning_rate": 0.0009315519178302038, "loss": 0.0969, "theoretical_loss": 4.252521234308315, "tokens_seen": 256638976 }, { "epoch": 0.08, "learning_rate": 0.0009314716738886215, "loss": 0.0961, "theoretical_loss": 4.2520024271884935, "tokens_seen": 256901120 }, { "epoch": 0.08, "learning_rate": 0.0009313914299470391, "loss": 0.0978, "theoretical_loss": 4.251484297252151, "tokens_seen": 257163264 }, { "epoch": 0.08, "learning_rate": 0.0009313111860054566, "loss": 0.1015, "theoretical_loss": 4.250966842926434, "tokens_seen": 257425408 }, { "epoch": 0.08, "learning_rate": 0.0009312309420638742, "loss": 0.0991, "theoretical_loss": 4.250450062643734, "tokens_seen": 257687552 }, { "epoch": 0.08, "learning_rate": 0.0009311506981222918, "loss": 0.095, "theoretical_loss": 4.249933954841672, "tokens_seen": 257949696 }, { "epoch": 0.08, "learning_rate": 0.0009310704541807093, "loss": 0.1023, "theoretical_loss": 4.2494185179630755, "tokens_seen": 258211840 }, { "epoch": 0.08, "learning_rate": 0.000930990210239127, "loss": 0.0993, "theoretical_loss": 4.24890375045595, "tokens_seen": 258473984 }, { "epoch": 0.08, "learning_rate": 0.0009309099662975445, "loss": 0.0972, "theoretical_loss": 4.248389650773463, "tokens_seen": 258736128 }, { "epoch": 0.08, "learning_rate": 0.0009308297223559621, "loss": 0.0985, "theoretical_loss": 4.24787621737392, "tokens_seen": 258998272 }, { "epoch": 0.08, "learning_rate": 0.0009307494784143798, "loss": 0.0969, "theoretical_loss": 4.247363448720739, "tokens_seen": 259260416 }, { "epoch": 0.08, "learning_rate": 0.0009306692344727973, "loss": 0.0985, "theoretical_loss": 4.246851343282432, "tokens_seen": 259522560 }, { "epoch": 0.08, "learning_rate": 0.000930588990531215, "loss": 0.1008, "theoretical_loss": 4.246339899532582, "tokens_seen": 259784704 }, { "epoch": 0.08, "learning_rate": 0.0009305087465896325, "loss": 0.0969, "theoretical_loss": 4.245829115949818, "tokens_seen": 260046848 }, { "epoch": 0.08, "learning_rate": 0.0009304285026480501, "loss": 0.1007, "theoretical_loss": 4.245318991017802, "tokens_seen": 260308992 }, { "epoch": 0.08, "learning_rate": 0.0009303482587064677, "loss": 0.098, "theoretical_loss": 4.244809523225195, "tokens_seen": 260571136 }, { "epoch": 0.08, "learning_rate": 0.0009302680147648853, "loss": 0.0997, "theoretical_loss": 4.244300711065646, "tokens_seen": 260833280 }, { "epoch": 0.08, "learning_rate": 0.0009301877708233028, "loss": 0.0933, "theoretical_loss": 4.243792553037767, "tokens_seen": 261095424 }, { "epoch": 0.08, "learning_rate": 0.0009301075268817204, "loss": 0.0978, "theoretical_loss": 4.243285047645106, "tokens_seen": 261357568 }, { "epoch": 0.08, "learning_rate": 0.000930027282940138, "loss": 0.1023, "theoretical_loss": 4.242778193396136, "tokens_seen": 261619712 }, { "epoch": 0.08, "learning_rate": 0.0009299470389985556, "loss": 0.0989, "theoretical_loss": 4.242271988804228, "tokens_seen": 261881856 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.004064733162522316, "objective/train/docs_used": 101934, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.00811505317688, "objective/train/original_loss": 2.008115291595459, "objective/train/theoretical_loss": 4.241766432387629, "objective/train/tokens_used": 282604000, "objective/train/value_avg": -0.00791168212890625, "objective/train/value_loss": 0.00014644188922829926, "objective/train/value_max": -0.0006613731384277344, "objective/train/value_min": -0.193603515625, "objective/train/value_reward_corr": 0.33414470602713064, "objective/train/value_std": 0.006023406982421875, "objective/train/weight_avg": 1.0041327476501465, "objective/train/weighted_lm_loss": 2.0166854858398438, "objective/train/weights_max": 1.1917407512664795, "objective/train/weights_min": 0.37122613191604614, "theoretical_loss": 4.241766432387629, "tokens_seen": 262144000 }, { "epoch": 0.08, "learning_rate": 0.0009298667950569733, "loss": 0.0953, "theoretical_loss": 4.241766432387629, "tokens_seen": 262144000 }, { "epoch": 0.08, "learning_rate": 0.0009297865511153908, "loss": 0.0968, "theoretical_loss": 4.241261522669445, "tokens_seen": 262406144 }, { "epoch": 0.08, "learning_rate": 0.0009297063071738084, "loss": 0.0992, "theoretical_loss": 4.240757258177617, "tokens_seen": 262668288 }, { "epoch": 0.08, "learning_rate": 0.000929626063232226, "loss": 0.0957, "theoretical_loss": 4.240253637444903, "tokens_seen": 262930432 }, { "epoch": 0.08, "learning_rate": 0.0009295458192906435, "loss": 0.0951, "theoretical_loss": 4.239750659008854, "tokens_seen": 263192576 }, { "epoch": 0.08, "learning_rate": 0.0009294655753490611, "loss": 0.0968, "theoretical_loss": 4.2392483214117975, "tokens_seen": 263454720 }, { "epoch": 0.08, "learning_rate": 0.0009293853314074787, "loss": 0.0992, "theoretical_loss": 4.238746623200815, "tokens_seen": 263716864 }, { "epoch": 0.08, "learning_rate": 0.0009293050874658963, "loss": 0.0982, "theoretical_loss": 4.238245562927722, "tokens_seen": 263979008 }, { "epoch": 0.08, "learning_rate": 0.000929224843524314, "loss": 0.0987, "theoretical_loss": 4.237745139149047, "tokens_seen": 264241152 }, { "epoch": 0.08, "learning_rate": 0.0009291445995827316, "loss": 0.0959, "theoretical_loss": 4.237245350426015, "tokens_seen": 264503296 }, { "epoch": 0.08, "learning_rate": 0.0009290643556411491, "loss": 0.0935, "theoretical_loss": 4.236746195324523, "tokens_seen": 264765440 }, { "epoch": 0.08, "learning_rate": 0.0009289841116995667, "loss": 0.1002, "theoretical_loss": 4.2362476724151215, "tokens_seen": 265027584 }, { "epoch": 0.08, "learning_rate": 0.0009289038677579843, "loss": 0.0973, "theoretical_loss": 4.235749780272998, "tokens_seen": 265289728 }, { "epoch": 0.08, "learning_rate": 0.0009288236238164018, "loss": 0.1, "theoretical_loss": 4.235252517477956, "tokens_seen": 265551872 }, { "epoch": 0.08, "learning_rate": 0.0009287433798748195, "loss": 0.0943, "theoretical_loss": 4.23475588261439, "tokens_seen": 265814016 }, { "epoch": 0.08, "learning_rate": 0.000928663135933237, "loss": 0.0966, "theoretical_loss": 4.234259874271275, "tokens_seen": 266076160 }, { "epoch": 0.08, "learning_rate": 0.0009285828919916546, "loss": 0.0969, "theoretical_loss": 4.23376449104214, "tokens_seen": 266338304 }, { "epoch": 0.08, "learning_rate": 0.0009285026480500723, "loss": 0.1002, "theoretical_loss": 4.233269731525055, "tokens_seen": 266600448 }, { "epoch": 0.08, "learning_rate": 0.0009284224041084899, "loss": 0.0978, "theoretical_loss": 4.232775594322605, "tokens_seen": 266862592 }, { "epoch": 0.08, "learning_rate": 0.0009283421601669074, "loss": 0.0976, "theoretical_loss": 4.232282078041876, "tokens_seen": 267124736 }, { "epoch": 0.08, "learning_rate": 0.000928261916225325, "loss": 0.0978, "theoretical_loss": 4.231789181294436, "tokens_seen": 267386880 }, { "epoch": 0.08, "learning_rate": 0.0009281816722837426, "loss": 0.1, "theoretical_loss": 4.231296902696314, "tokens_seen": 267649024 }, { "epoch": 0.08, "learning_rate": 0.0009281014283421601, "loss": 0.0956, "theoretical_loss": 4.230805240867982, "tokens_seen": 267911168 }, { "epoch": 0.08, "learning_rate": 0.0009280211844005778, "loss": 0.097, "theoretical_loss": 4.230314194434336, "tokens_seen": 268173312 }, { "epoch": 0.08, "learning_rate": 0.0009279409404589953, "loss": 0.0974, "theoretical_loss": 4.229823762024681, "tokens_seen": 268435456 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0018670569406822324, "objective/train/docs_used": 104173, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8005046844482422, "objective/train/original_loss": 1.8005049228668213, "objective/train/theoretical_loss": 4.2293339422727065, "objective/train/tokens_used": 289157600, "objective/train/value_avg": -0.00882720947265625, "objective/train/value_loss": 0.0002679454628378153, "objective/train/value_max": -0.00035691261291503906, "objective/train/value_min": -0.283203125, "objective/train/value_reward_corr": 0.5715762562194157, "objective/train/value_std": 0.01021575927734375, "objective/train/weight_avg": 1.0019898414611816, "objective/train/weighted_lm_loss": 1.803560495376587, "objective/train/weights_max": 1.1879379749298096, "objective/train/weights_min": 0.3708355128765106, "theoretical_loss": 4.2293339422727065, "tokens_seen": 268697600 }, { "epoch": 0.08, "learning_rate": 0.000927860696517413, "loss": 0.0972, "theoretical_loss": 4.2293339422727065, "tokens_seen": 268697600 }, { "epoch": 0.08, "learning_rate": 0.0009277804525758306, "loss": 0.0986, "theoretical_loss": 4.228844733816474, "tokens_seen": 268959744 }, { "epoch": 0.08, "learning_rate": 0.0009277002086342481, "loss": 0.0948, "theoretical_loss": 4.228356135298394, "tokens_seen": 269221888 }, { "epoch": 0.08, "learning_rate": 0.0009276199646926658, "loss": 0.0938, "theoretical_loss": 4.227868145365211, "tokens_seen": 269484032 }, { "epoch": 0.08, "learning_rate": 0.0009275397207510833, "loss": 0.0966, "theoretical_loss": 4.227380762667987, "tokens_seen": 269746176 }, { "epoch": 0.08, "learning_rate": 0.0009274594768095009, "loss": 0.0951, "theoretical_loss": 4.226893985862076, "tokens_seen": 270008320 }, { "epoch": 0.08, "learning_rate": 0.0009273792328679185, "loss": 0.0985, "theoretical_loss": 4.226407813607116, "tokens_seen": 270270464 }, { "epoch": 0.08, "learning_rate": 0.0009272989889263361, "loss": 0.0967, "theoretical_loss": 4.2259222445670055, "tokens_seen": 270532608 }, { "epoch": 0.08, "learning_rate": 0.0009272187449847536, "loss": 0.0935, "theoretical_loss": 4.225437277409885, "tokens_seen": 270794752 }, { "epoch": 0.08, "learning_rate": 0.0009271385010431712, "loss": 0.0953, "theoretical_loss": 4.224952910808122, "tokens_seen": 271056896 }, { "epoch": 0.08, "learning_rate": 0.0009270582571015889, "loss": 0.0973, "theoretical_loss": 4.224469143438294, "tokens_seen": 271319040 }, { "epoch": 0.08, "learning_rate": 0.0009269780131600064, "loss": 0.0998, "theoretical_loss": 4.223985973981171, "tokens_seen": 271581184 }, { "epoch": 0.08, "learning_rate": 0.0009268977692184241, "loss": 0.0996, "theoretical_loss": 4.223503401121693, "tokens_seen": 271843328 }, { "epoch": 0.08, "learning_rate": 0.0009268175252768416, "loss": 0.0941, "theoretical_loss": 4.223021423548962, "tokens_seen": 272105472 }, { "epoch": 0.08, "learning_rate": 0.0009267372813352593, "loss": 0.0962, "theoretical_loss": 4.222540039956215, "tokens_seen": 272367616 }, { "epoch": 0.08, "learning_rate": 0.0009266570373936768, "loss": 0.0953, "theoretical_loss": 4.222059249040814, "tokens_seen": 272629760 }, { "epoch": 0.08, "learning_rate": 0.0009265767934520943, "loss": 0.0989, "theoretical_loss": 4.2215790495042285, "tokens_seen": 272891904 }, { "epoch": 0.08, "learning_rate": 0.000926496549510512, "loss": 0.0982, "theoretical_loss": 4.221099440052014, "tokens_seen": 273154048 }, { "epoch": 0.08, "learning_rate": 0.0009264163055689295, "loss": 0.0956, "theoretical_loss": 4.220620419393799, "tokens_seen": 273416192 }, { "epoch": 0.08, "learning_rate": 0.0009263360616273471, "loss": 0.0979, "theoretical_loss": 4.220141986243268, "tokens_seen": 273678336 }, { "epoch": 0.08, "learning_rate": 0.0009262558176857648, "loss": 0.098, "theoretical_loss": 4.219664139318145, "tokens_seen": 273940480 }, { "epoch": 0.08, "learning_rate": 0.0009261755737441824, "loss": 0.0972, "theoretical_loss": 4.219186877340174, "tokens_seen": 274202624 }, { "epoch": 0.08, "learning_rate": 0.0009260953298025999, "loss": 0.0943, "theoretical_loss": 4.218710199035108, "tokens_seen": 274464768 }, { "epoch": 0.08, "learning_rate": 0.0009260150858610175, "loss": 0.0941, "theoretical_loss": 4.218234103132686, "tokens_seen": 274726912 }, { "epoch": 0.08, "learning_rate": 0.0009259348419194351, "loss": 0.097, "theoretical_loss": 4.217758588366623, "tokens_seen": 274989056 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0027189042884856462, "objective/train/docs_used": 106703, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8442187309265137, "objective/train/original_loss": 1.8442187309265137, "objective/train/theoretical_loss": 4.217283653474588, "objective/train/tokens_used": 295711200, "objective/train/value_avg": -0.011016845703125, "objective/train/value_loss": 0.0005074041546322405, "objective/train/value_max": -0.0006022453308105469, "objective/train/value_min": -0.90625, "objective/train/value_reward_corr": 0.6262922111240474, "objective/train/value_std": 0.0198211669921875, "objective/train/weight_avg": 1.0029503107070923, "objective/train/weighted_lm_loss": 1.849584937095642, "objective/train/weights_max": 1.8512089252471924, "objective/train/weights_min": 0.36976757645606995, "theoretical_loss": 4.217283653474588, "tokens_seen": 275251200 }, { "epoch": 0.08, "learning_rate": 0.0009258545979778526, "loss": 0.0936, "theoretical_loss": 4.217283653474588, "tokens_seen": 275251200 }, { "epoch": 0.08, "learning_rate": 0.0009257743540362703, "loss": 0.0962, "theoretical_loss": 4.216809297198195, "tokens_seen": 275513344 }, { "epoch": 0.08, "learning_rate": 0.0009256941100946878, "loss": 0.0968, "theoretical_loss": 4.21633551828298, "tokens_seen": 275775488 }, { "epoch": 0.08, "learning_rate": 0.0009256138661531054, "loss": 0.0981, "theoretical_loss": 4.215862315478388, "tokens_seen": 276037632 }, { "epoch": 0.08, "learning_rate": 0.0009255336222115231, "loss": 0.0992, "theoretical_loss": 4.2153896875377574, "tokens_seen": 276299776 }, { "epoch": 0.08, "learning_rate": 0.0009254533782699407, "loss": 0.0979, "theoretical_loss": 4.214917633218304, "tokens_seen": 276561920 }, { "epoch": 0.08, "learning_rate": 0.0009253731343283583, "loss": 0.098, "theoretical_loss": 4.214446151281106, "tokens_seen": 276824064 }, { "epoch": 0.08, "learning_rate": 0.0009252928903867758, "loss": 0.0945, "theoretical_loss": 4.213975240491084, "tokens_seen": 277086208 }, { "epoch": 0.08, "learning_rate": 0.0009252126464451934, "loss": 0.0964, "theoretical_loss": 4.213504899616995, "tokens_seen": 277348352 }, { "epoch": 0.08, "learning_rate": 0.000925132402503611, "loss": 0.0969, "theoretical_loss": 4.213035127431402, "tokens_seen": 277610496 }, { "epoch": 0.08, "learning_rate": 0.0009250521585620286, "loss": 0.0967, "theoretical_loss": 4.212565922710677, "tokens_seen": 277872640 }, { "epoch": 0.08, "learning_rate": 0.0009249719146204461, "loss": 0.0964, "theoretical_loss": 4.21209728423497, "tokens_seen": 278134784 }, { "epoch": 0.08, "learning_rate": 0.0009248916706788639, "loss": 0.0984, "theoretical_loss": 4.2116292107882, "tokens_seen": 278396928 }, { "epoch": 0.08, "learning_rate": 0.0009248114267372814, "loss": 0.0973, "theoretical_loss": 4.211161701158042, "tokens_seen": 278659072 }, { "epoch": 0.08, "learning_rate": 0.0009247311827956989, "loss": 0.0985, "theoretical_loss": 4.2106947541359085, "tokens_seen": 278921216 }, { "epoch": 0.08, "learning_rate": 0.0009246509388541166, "loss": 0.0962, "theoretical_loss": 4.210228368516935, "tokens_seen": 279183360 }, { "epoch": 0.08, "learning_rate": 0.0009245706949125341, "loss": 0.1018, "theoretical_loss": 4.209762543099966, "tokens_seen": 279445504 }, { "epoch": 0.08, "learning_rate": 0.0009244904509709517, "loss": 0.0971, "theoretical_loss": 4.209297276687541, "tokens_seen": 279707648 }, { "epoch": 0.08, "learning_rate": 0.0009244102070293693, "loss": 0.1, "theoretical_loss": 4.2088325680858745, "tokens_seen": 279969792 }, { "epoch": 0.08, "learning_rate": 0.0009243299630877869, "loss": 0.0954, "theoretical_loss": 4.208368416104849, "tokens_seen": 280231936 }, { "epoch": 0.09, "learning_rate": 0.0009242497191462045, "loss": 0.0986, "theoretical_loss": 4.207904819557995, "tokens_seen": 280494080 }, { "epoch": 0.09, "learning_rate": 0.000924169475204622, "loss": 0.0965, "theoretical_loss": 4.207441777262477, "tokens_seen": 280756224 }, { "epoch": 0.09, "learning_rate": 0.0009240892312630397, "loss": 0.0958, "theoretical_loss": 4.206979288039081, "tokens_seen": 281018368 }, { "epoch": 0.09, "learning_rate": 0.0009240089873214573, "loss": 0.0943, "theoretical_loss": 4.206517350712199, "tokens_seen": 281280512 }, { "epoch": 0.09, "learning_rate": 0.0009239287433798749, "loss": 0.0964, "theoretical_loss": 4.206055964109813, "tokens_seen": 281542656 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0017802802613005042, "objective/train/docs_used": 109222, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.907019853591919, "objective/train/original_loss": 1.907020092010498, "objective/train/theoretical_loss": 4.205595127063485, "objective/train/tokens_used": 302264800, "objective/train/value_avg": -0.007091522216796875, "objective/train/value_loss": 0.00020236380805727094, "objective/train/value_max": -0.00042891502380371094, "objective/train/value_min": -0.2548828125, "objective/train/value_reward_corr": 0.46679314179268283, "objective/train/value_std": 0.0074310302734375, "objective/train/weight_avg": 1.0018771886825562, "objective/train/weighted_lm_loss": 1.9096391201019287, "objective/train/weights_max": 1.1684050559997559, "objective/train/weights_min": 0.38661831617355347, "theoretical_loss": 4.205595127063485, "tokens_seen": 281804800 }, { "epoch": 0.09, "learning_rate": 0.0009238484994382924, "loss": 0.0951, "theoretical_loss": 4.205595127063485, "tokens_seen": 281804800 }, { "epoch": 0.09, "learning_rate": 0.0009237682554967101, "loss": 0.0938, "theoretical_loss": 4.205134838408337, "tokens_seen": 282066944 }, { "epoch": 0.09, "learning_rate": 0.0009236880115551276, "loss": 0.0959, "theoretical_loss": 4.20467509698304, "tokens_seen": 282329088 }, { "epoch": 0.09, "learning_rate": 0.0009236077676135451, "loss": 0.0954, "theoretical_loss": 4.204215901629803, "tokens_seen": 282591232 }, { "epoch": 0.09, "learning_rate": 0.0009235275236719628, "loss": 0.0962, "theoretical_loss": 4.203757251194353, "tokens_seen": 282853376 }, { "epoch": 0.09, "learning_rate": 0.0009234472797303803, "loss": 0.0952, "theoretical_loss": 4.203299144525923, "tokens_seen": 283115520 }, { "epoch": 0.09, "learning_rate": 0.000923367035788798, "loss": 0.096, "theoretical_loss": 4.202841580477241, "tokens_seen": 283377664 }, { "epoch": 0.09, "learning_rate": 0.0009232867918472156, "loss": 0.0989, "theoretical_loss": 4.202384557904513, "tokens_seen": 283639808 }, { "epoch": 0.09, "learning_rate": 0.0009232065479056332, "loss": 0.0955, "theoretical_loss": 4.201928075667411, "tokens_seen": 283901952 }, { "epoch": 0.09, "learning_rate": 0.0009231263039640507, "loss": 0.0949, "theoretical_loss": 4.201472132629057, "tokens_seen": 284164096 }, { "epoch": 0.09, "learning_rate": 0.0009230460600224683, "loss": 0.1007, "theoretical_loss": 4.201016727656012, "tokens_seen": 284426240 }, { "epoch": 0.09, "learning_rate": 0.0009229658160808859, "loss": 0.0983, "theoretical_loss": 4.2005618596182615, "tokens_seen": 284688384 }, { "epoch": 0.09, "learning_rate": 0.0009228855721393035, "loss": 0.0985, "theoretical_loss": 4.200107527389202, "tokens_seen": 284950528 }, { "epoch": 0.09, "learning_rate": 0.0009228053281977211, "loss": 0.0965, "theoretical_loss": 4.199653729845626, "tokens_seen": 285212672 }, { "epoch": 0.09, "learning_rate": 0.0009227250842561386, "loss": 0.0965, "theoretical_loss": 4.199200465867714, "tokens_seen": 285474816 }, { "epoch": 0.09, "learning_rate": 0.0009226448403145564, "loss": 0.0955, "theoretical_loss": 4.198747734339013, "tokens_seen": 285736960 }, { "epoch": 0.09, "learning_rate": 0.0009225645963729739, "loss": 0.0947, "theoretical_loss": 4.198295534146429, "tokens_seen": 285999104 }, { "epoch": 0.09, "learning_rate": 0.0009224843524313914, "loss": 0.095, "theoretical_loss": 4.197843864180214, "tokens_seen": 286261248 }, { "epoch": 0.09, "learning_rate": 0.0009224041084898091, "loss": 0.0949, "theoretical_loss": 4.197392723333951, "tokens_seen": 286523392 }, { "epoch": 0.09, "learning_rate": 0.0009223238645482266, "loss": 0.0975, "theoretical_loss": 4.196942110504538, "tokens_seen": 286785536 }, { "epoch": 0.09, "learning_rate": 0.0009222436206066442, "loss": 0.0954, "theoretical_loss": 4.196492024592183, "tokens_seen": 287047680 }, { "epoch": 0.09, "learning_rate": 0.0009221633766650618, "loss": 0.095, "theoretical_loss": 4.196042464500382, "tokens_seen": 287309824 }, { "epoch": 0.09, "learning_rate": 0.0009220831327234794, "loss": 0.0947, "theoretical_loss": 4.195593429135916, "tokens_seen": 287571968 }, { "epoch": 0.09, "learning_rate": 0.0009220028887818969, "loss": 0.1014, "theoretical_loss": 4.195144917408828, "tokens_seen": 287834112 }, { "epoch": 0.09, "learning_rate": 0.0009219226448403146, "loss": 0.0949, "theoretical_loss": 4.194696928232417, "tokens_seen": 288096256 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.002935412572696805, "objective/train/docs_used": 111591, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8163905143737793, "objective/train/original_loss": 1.8163906335830688, "objective/train/theoretical_loss": 4.194249460523222, "objective/train/tokens_used": 308818400, "objective/train/value_avg": -0.00843048095703125, "objective/train/value_loss": 0.0001841627963585779, "objective/train/value_max": -0.000507354736328125, "objective/train/value_min": -0.318603515625, "objective/train/value_reward_corr": 0.49277345208210327, "objective/train/value_std": 0.0080413818359375, "objective/train/weight_avg": 1.0030207633972168, "objective/train/weighted_lm_loss": 1.8225624561309814, "objective/train/weights_max": 1.174675464630127, "objective/train/weights_min": 0.3693629503250122, "theoretical_loss": 4.194249460523222, "tokens_seen": 288358400 }, { "epoch": 0.09, "learning_rate": 0.0009218424008987322, "loss": 0.0934, "theoretical_loss": 4.194249460523222, "tokens_seen": 288358400 }, { "epoch": 0.09, "learning_rate": 0.0009217621569571497, "loss": 0.0965, "theoretical_loss": 4.193802513201015, "tokens_seen": 288620544 }, { "epoch": 0.09, "learning_rate": 0.0009216819130155674, "loss": 0.0999, "theoretical_loss": 4.193356085188778, "tokens_seen": 288882688 }, { "epoch": 0.09, "learning_rate": 0.0009216016690739849, "loss": 0.0984, "theoretical_loss": 4.1929101754127025, "tokens_seen": 289144832 }, { "epoch": 0.09, "learning_rate": 0.0009215214251324026, "loss": 0.0957, "theoretical_loss": 4.192464782802167, "tokens_seen": 289406976 }, { "epoch": 0.09, "learning_rate": 0.0009214411811908201, "loss": 0.0957, "theoretical_loss": 4.192019906289733, "tokens_seen": 289669120 }, { "epoch": 0.09, "learning_rate": 0.0009213609372492377, "loss": 0.0985, "theoretical_loss": 4.1915755448111245, "tokens_seen": 289931264 }, { "epoch": 0.09, "learning_rate": 0.0009212806933076553, "loss": 0.0972, "theoretical_loss": 4.191131697305222, "tokens_seen": 290193408 }, { "epoch": 0.09, "learning_rate": 0.0009212004493660728, "loss": 0.0962, "theoretical_loss": 4.1906883627140505, "tokens_seen": 290455552 }, { "epoch": 0.09, "learning_rate": 0.0009211202054244905, "loss": 0.0931, "theoretical_loss": 4.19024553998276, "tokens_seen": 290717696 }, { "epoch": 0.09, "learning_rate": 0.0009210399614829081, "loss": 0.0957, "theoretical_loss": 4.189803228059623, "tokens_seen": 290979840 }, { "epoch": 0.09, "learning_rate": 0.0009209597175413257, "loss": 0.096, "theoretical_loss": 4.189361425896016, "tokens_seen": 291241984 }, { "epoch": 0.09, "learning_rate": 0.0009208794735997432, "loss": 0.0929, "theoretical_loss": 4.188920132446411, "tokens_seen": 291504128 }, { "epoch": 0.09, "learning_rate": 0.0009207992296581609, "loss": 0.0937, "theoretical_loss": 4.188479346668359, "tokens_seen": 291766272 }, { "epoch": 0.09, "learning_rate": 0.0009207189857165784, "loss": 0.0941, "theoretical_loss": 4.188039067522484, "tokens_seen": 292028416 }, { "epoch": 0.09, "learning_rate": 0.0009206387417749959, "loss": 0.0953, "theoretical_loss": 4.18759929397247, "tokens_seen": 292290560 }, { "epoch": 0.09, "learning_rate": 0.0009205584978334136, "loss": 0.0958, "theoretical_loss": 4.187160024985044, "tokens_seen": 292552704 }, { "epoch": 0.09, "learning_rate": 0.0009204782538918311, "loss": 0.0973, "theoretical_loss": 4.1867212595299685, "tokens_seen": 292814848 }, { "epoch": 0.09, "learning_rate": 0.0009203980099502489, "loss": 0.0958, "theoretical_loss": 4.186282996580034, "tokens_seen": 293076992 }, { "epoch": 0.09, "learning_rate": 0.0009203177660086664, "loss": 0.0983, "theoretical_loss": 4.185845235111037, "tokens_seen": 293339136 }, { "epoch": 0.09, "learning_rate": 0.000920237522067084, "loss": 0.0962, "theoretical_loss": 4.185407974101779, "tokens_seen": 293601280 }, { "epoch": 0.09, "learning_rate": 0.0009201572781255016, "loss": 0.0948, "theoretical_loss": 4.184971212534048, "tokens_seen": 293863424 }, { "epoch": 0.09, "learning_rate": 0.0009200770341839191, "loss": 0.0959, "theoretical_loss": 4.184534949392611, "tokens_seen": 294125568 }, { "epoch": 0.09, "learning_rate": 0.0009199967902423367, "loss": 0.095, "theoretical_loss": 4.184099183665199, "tokens_seen": 294387712 }, { "epoch": 0.09, "learning_rate": 0.0009199165463007543, "loss": 0.0982, "theoretical_loss": 4.1836639143425, "tokens_seen": 294649856 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.001675541396252811, "objective/train/docs_used": 114046, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.922358751296997, "objective/train/original_loss": 1.922358751296997, "objective/train/theoretical_loss": 4.1832291404181445, "objective/train/tokens_used": 315372000, "objective/train/value_avg": -0.00826263427734375, "objective/train/value_loss": 0.0002560181892476976, "objective/train/value_max": -0.00048041343688964844, "objective/train/value_min": -0.51025390625, "objective/train/value_reward_corr": 0.5477898481987934, "objective/train/value_std": 0.008514404296875, "objective/train/weight_avg": 1.001792550086975, "objective/train/weighted_lm_loss": 1.9252777099609375, "objective/train/weights_max": 1.2260377407073975, "objective/train/weights_min": 0.37242892384529114, "theoretical_loss": 4.1832291404181445, "tokens_seen": 294912000 }, { "epoch": 0.09, "learning_rate": 0.0009198363023591719, "loss": 0.0955, "theoretical_loss": 4.1832291404181445, "tokens_seen": 294912000 }, { "epoch": 0.09, "learning_rate": 0.0009197560584175894, "loss": 0.0958, "theoretical_loss": 4.182794860888696, "tokens_seen": 295174144 }, { "epoch": 0.09, "learning_rate": 0.0009196758144760072, "loss": 0.0975, "theoretical_loss": 4.18236107475364, "tokens_seen": 295436288 }, { "epoch": 0.09, "learning_rate": 0.0009195955705344247, "loss": 0.0973, "theoretical_loss": 4.18192778101537, "tokens_seen": 295698432 }, { "epoch": 0.09, "learning_rate": 0.0009195153265928422, "loss": 0.0942, "theoretical_loss": 4.181494978679181, "tokens_seen": 295960576 }, { "epoch": 0.09, "learning_rate": 0.0009194350826512599, "loss": 0.0951, "theoretical_loss": 4.181062666753256, "tokens_seen": 296222720 }, { "epoch": 0.09, "learning_rate": 0.0009193548387096774, "loss": 0.0946, "theoretical_loss": 4.180630844248653, "tokens_seen": 296484864 }, { "epoch": 0.09, "learning_rate": 0.000919274594768095, "loss": 0.0976, "theoretical_loss": 4.180199510179299, "tokens_seen": 296747008 }, { "epoch": 0.09, "learning_rate": 0.0009191943508265126, "loss": 0.0947, "theoretical_loss": 4.179768663561975, "tokens_seen": 297009152 }, { "epoch": 0.09, "learning_rate": 0.0009191141068849302, "loss": 0.0924, "theoretical_loss": 4.1793383034163085, "tokens_seen": 297271296 }, { "epoch": 0.09, "learning_rate": 0.0009190338629433478, "loss": 0.0939, "theoretical_loss": 4.178908428764759, "tokens_seen": 297533440 }, { "epoch": 0.09, "learning_rate": 0.0009189536190017654, "loss": 0.0945, "theoretical_loss": 4.17847903863261, "tokens_seen": 297795584 }, { "epoch": 0.09, "learning_rate": 0.000918873375060183, "loss": 0.095, "theoretical_loss": 4.178050132047958, "tokens_seen": 298057728 }, { "epoch": 0.09, "learning_rate": 0.0009187931311186006, "loss": 0.0937, "theoretical_loss": 4.177621708041703, "tokens_seen": 298319872 }, { "epoch": 0.09, "learning_rate": 0.0009187128871770182, "loss": 0.0964, "theoretical_loss": 4.177193765647534, "tokens_seen": 298582016 }, { "epoch": 0.09, "learning_rate": 0.0009186326432354357, "loss": 0.0973, "theoretical_loss": 4.176766303901922, "tokens_seen": 298844160 }, { "epoch": 0.09, "learning_rate": 0.0009185523992938534, "loss": 0.0954, "theoretical_loss": 4.17633932184411, "tokens_seen": 299106304 }, { "epoch": 0.09, "learning_rate": 0.0009184721553522709, "loss": 0.0951, "theoretical_loss": 4.1759128185161005, "tokens_seen": 299368448 }, { "epoch": 0.09, "learning_rate": 0.0009183919114106885, "loss": 0.0917, "theoretical_loss": 4.175486792962646, "tokens_seen": 299630592 }, { "epoch": 0.09, "learning_rate": 0.0009183116674691061, "loss": 0.0952, "theoretical_loss": 4.175061244231237, "tokens_seen": 299892736 }, { "epoch": 0.09, "learning_rate": 0.0009182314235275236, "loss": 0.097, "theoretical_loss": 4.174636171372097, "tokens_seen": 300154880 }, { "epoch": 0.09, "learning_rate": 0.0009181511795859412, "loss": 0.0914, "theoretical_loss": 4.174211573438166, "tokens_seen": 300417024 }, { "epoch": 0.09, "learning_rate": 0.0009180709356443589, "loss": 0.0931, "theoretical_loss": 4.173787449485094, "tokens_seen": 300679168 }, { "epoch": 0.09, "learning_rate": 0.0009179906917027765, "loss": 0.0979, "theoretical_loss": 4.17336379857123, "tokens_seen": 300941312 }, { "epoch": 0.09, "learning_rate": 0.0009179104477611941, "loss": 0.0941, "theoretical_loss": 4.172940619757611, "tokens_seen": 301203456 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.002791388425976038, "objective/train/docs_used": 116418, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9814350605010986, "objective/train/original_loss": 1.9814350605010986, "objective/train/theoretical_loss": 4.172517912107954, "objective/train/tokens_used": 321925600, "objective/train/value_avg": -0.01168060302734375, "objective/train/value_loss": 0.0004382512124720961, "objective/train/value_max": -0.0003905296325683594, "objective/train/value_min": -0.64453125, "objective/train/value_reward_corr": 0.6855180405018573, "objective/train/value_std": 0.0270538330078125, "objective/train/weight_avg": 1.0029999017715454, "objective/train/weighted_lm_loss": 1.9875683784484863, "objective/train/weights_max": 1.4244129657745361, "objective/train/weights_min": 0.3713734447956085, "theoretical_loss": 4.172517912107954, "tokens_seen": 301465600 }, { "epoch": 0.09, "learning_rate": 0.0009178302038196117, "loss": 0.0932, "theoretical_loss": 4.172517912107954, "tokens_seen": 301465600 }, { "epoch": 0.09, "learning_rate": 0.0009177499598780292, "loss": 0.0923, "theoretical_loss": 4.172095674688645, "tokens_seen": 301727744 }, { "epoch": 0.09, "learning_rate": 0.0009176697159364468, "loss": 0.0951, "theoretical_loss": 4.171673906568729, "tokens_seen": 301989888 }, { "epoch": 0.09, "learning_rate": 0.0009175894719948644, "loss": 0.0967, "theoretical_loss": 4.171252606819899, "tokens_seen": 302252032 }, { "epoch": 0.09, "learning_rate": 0.0009175092280532819, "loss": 0.0948, "theoretical_loss": 4.170831774516489, "tokens_seen": 302514176 }, { "epoch": 0.09, "learning_rate": 0.0009174289841116997, "loss": 0.0958, "theoretical_loss": 4.170411408735461, "tokens_seen": 302776320 }, { "epoch": 0.09, "learning_rate": 0.0009173487401701172, "loss": 0.0958, "theoretical_loss": 4.169991508556398, "tokens_seen": 303038464 }, { "epoch": 0.09, "learning_rate": 0.0009172684962285348, "loss": 0.0944, "theoretical_loss": 4.169572073061493, "tokens_seen": 303300608 }, { "epoch": 0.09, "learning_rate": 0.0009171882522869524, "loss": 0.0977, "theoretical_loss": 4.16915310133554, "tokens_seen": 303562752 }, { "epoch": 0.09, "learning_rate": 0.0009171080083453699, "loss": 0.0962, "theoretical_loss": 4.1687345924659205, "tokens_seen": 303824896 }, { "epoch": 0.09, "learning_rate": 0.0009170277644037875, "loss": 0.0937, "theoretical_loss": 4.168316545542602, "tokens_seen": 304087040 }, { "epoch": 0.09, "learning_rate": 0.0009169475204622051, "loss": 0.0935, "theoretical_loss": 4.167898959658121, "tokens_seen": 304349184 }, { "epoch": 0.09, "learning_rate": 0.0009168672765206227, "loss": 0.0903, "theoretical_loss": 4.167481833907576, "tokens_seen": 304611328 }, { "epoch": 0.09, "learning_rate": 0.0009167870325790402, "loss": 0.098, "theoretical_loss": 4.16706516738862, "tokens_seen": 304873472 }, { "epoch": 0.09, "learning_rate": 0.000916706788637458, "loss": 0.0937, "theoretical_loss": 4.166648959201449, "tokens_seen": 305135616 }, { "epoch": 0.09, "learning_rate": 0.0009166265446958755, "loss": 0.097, "theoretical_loss": 4.166233208448794, "tokens_seen": 305397760 }, { "epoch": 0.09, "learning_rate": 0.0009165463007542931, "loss": 0.0962, "theoretical_loss": 4.165817914235908, "tokens_seen": 305659904 }, { "epoch": 0.09, "learning_rate": 0.0009164660568127107, "loss": 0.0944, "theoretical_loss": 4.165403075670562, "tokens_seen": 305922048 }, { "epoch": 0.09, "learning_rate": 0.0009163858128711282, "loss": 0.0938, "theoretical_loss": 4.164988691863032, "tokens_seen": 306184192 }, { "epoch": 0.09, "learning_rate": 0.0009163055689295459, "loss": 0.0937, "theoretical_loss": 4.164574761926092, "tokens_seen": 306446336 }, { "epoch": 0.09, "learning_rate": 0.0009162253249879634, "loss": 0.0943, "theoretical_loss": 4.164161284975005, "tokens_seen": 306708480 }, { "epoch": 0.09, "learning_rate": 0.000916145081046381, "loss": 0.0938, "theoretical_loss": 4.1637482601275115, "tokens_seen": 306970624 }, { "epoch": 0.09, "learning_rate": 0.0009160648371047986, "loss": 0.0979, "theoretical_loss": 4.163335686503822, "tokens_seen": 307232768 }, { "epoch": 0.09, "learning_rate": 0.0009159845931632162, "loss": 0.0956, "theoretical_loss": 4.162923563226607, "tokens_seen": 307494912 }, { "epoch": 0.09, "learning_rate": 0.0009159043492216338, "loss": 0.093, "theoretical_loss": 4.1625118894209905, "tokens_seen": 307757056 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0017242436297237873, "objective/train/docs_used": 118729, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8616645336151123, "objective/train/original_loss": 1.8616645336151123, "objective/train/theoretical_loss": 4.16210066421454, "objective/train/tokens_used": 328479200, "objective/train/value_avg": -0.00921630859375, "objective/train/value_loss": 0.0002815025800373405, "objective/train/value_max": -0.0003936290740966797, "objective/train/value_min": -0.79638671875, "objective/train/value_reward_corr": 0.6599847619871043, "objective/train/value_std": 0.0160675048828125, "objective/train/weight_avg": 1.0018616914749146, "objective/train/weighted_lm_loss": 1.86510169506073, "objective/train/weights_max": 2.101844549179077, "objective/train/weights_min": 0.36873507499694824, "theoretical_loss": 4.16210066421454, "tokens_seen": 308019200 }, { "epoch": 0.09, "learning_rate": 0.0009158241052800514, "loss": 0.0932, "theoretical_loss": 4.16210066421454, "tokens_seen": 308019200 }, { "epoch": 0.09, "learning_rate": 0.000915743861338469, "loss": 0.0945, "theoretical_loss": 4.161689886737255, "tokens_seen": 308281344 }, { "epoch": 0.09, "learning_rate": 0.0009156636173968865, "loss": 0.0963, "theoretical_loss": 4.161279556121562, "tokens_seen": 308543488 }, { "epoch": 0.09, "learning_rate": 0.0009155833734553042, "loss": 0.0952, "theoretical_loss": 4.160869671502302, "tokens_seen": 308805632 }, { "epoch": 0.09, "learning_rate": 0.0009155031295137217, "loss": 0.0959, "theoretical_loss": 4.160460232016725, "tokens_seen": 309067776 }, { "epoch": 0.09, "learning_rate": 0.0009154228855721394, "loss": 0.0964, "theoretical_loss": 4.16005123680448, "tokens_seen": 309329920 }, { "epoch": 0.09, "learning_rate": 0.0009153426416305569, "loss": 0.0931, "theoretical_loss": 4.159642685007606, "tokens_seen": 309592064 }, { "epoch": 0.09, "learning_rate": 0.0009152623976889744, "loss": 0.0924, "theoretical_loss": 4.1592345757705225, "tokens_seen": 309854208 }, { "epoch": 0.09, "learning_rate": 0.0009151821537473922, "loss": 0.0917, "theoretical_loss": 4.158826908240022, "tokens_seen": 310116352 }, { "epoch": 0.09, "learning_rate": 0.0009151019098058097, "loss": 0.0928, "theoretical_loss": 4.158419681565265, "tokens_seen": 310378496 }, { "epoch": 0.09, "learning_rate": 0.0009150216658642273, "loss": 0.0943, "theoretical_loss": 4.1580128948977615, "tokens_seen": 310640640 }, { "epoch": 0.09, "learning_rate": 0.0009149414219226449, "loss": 0.0938, "theoretical_loss": 4.157606547391374, "tokens_seen": 310902784 }, { "epoch": 0.09, "learning_rate": 0.0009148611779810625, "loss": 0.0973, "theoretical_loss": 4.157200638202301, "tokens_seen": 311164928 }, { "epoch": 0.09, "learning_rate": 0.00091478093403948, "loss": 0.0901, "theoretical_loss": 4.156795166489074, "tokens_seen": 311427072 }, { "epoch": 0.09, "learning_rate": 0.0009147006900978976, "loss": 0.0948, "theoretical_loss": 4.156390131412543, "tokens_seen": 311689216 }, { "epoch": 0.09, "learning_rate": 0.0009146204461563152, "loss": 0.0936, "theoretical_loss": 4.155985532135875, "tokens_seen": 311951360 }, { "epoch": 0.09, "learning_rate": 0.0009145402022147327, "loss": 0.0966, "theoretical_loss": 4.1555813678245395, "tokens_seen": 312213504 }, { "epoch": 0.09, "learning_rate": 0.0009144599582731505, "loss": 0.0935, "theoretical_loss": 4.155177637646306, "tokens_seen": 312475648 }, { "epoch": 0.09, "learning_rate": 0.000914379714331568, "loss": 0.0914, "theoretical_loss": 4.154774340771228, "tokens_seen": 312737792 }, { "epoch": 0.09, "learning_rate": 0.0009142994703899856, "loss": 0.095, "theoretical_loss": 4.154371476371646, "tokens_seen": 312999936 }, { "epoch": 0.09, "learning_rate": 0.0009142192264484032, "loss": 0.0931, "theoretical_loss": 4.153969043622169, "tokens_seen": 313262080 }, { "epoch": 0.1, "learning_rate": 0.0009141389825068207, "loss": 0.0963, "theoretical_loss": 4.15356704169967, "tokens_seen": 313524224 }, { "epoch": 0.1, "learning_rate": 0.0009140587385652384, "loss": 0.0968, "theoretical_loss": 4.153165469783279, "tokens_seen": 313786368 }, { "epoch": 0.1, "learning_rate": 0.0009139784946236559, "loss": 0.0969, "theoretical_loss": 4.152764327054376, "tokens_seen": 314048512 }, { "epoch": 0.1, "learning_rate": 0.0009138982506820735, "loss": 0.0947, "theoretical_loss": 4.152363612696579, "tokens_seen": 314310656 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0024639612529426813, "objective/train/docs_used": 121169, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8197975158691406, "objective/train/original_loss": 1.8197975158691406, "objective/train/theoretical_loss": 4.151963325895737, "objective/train/tokens_used": 335032800, "objective/train/value_avg": -0.0099029541015625, "objective/train/value_loss": 0.0005963764269836247, "objective/train/value_max": -0.0004029273986816406, "objective/train/value_min": -0.27783203125, "objective/train/value_reward_corr": 0.45480866686294624, "objective/train/value_std": 0.01166534423828125, "objective/train/weight_avg": 1.0026694536209106, "objective/train/weighted_lm_loss": 1.8254142999649048, "objective/train/weights_max": 1.2214922904968262, "objective/train/weights_min": 0.020853379741311073, "theoretical_loss": 4.151963325895737, "tokens_seen": 314572800 }, { "epoch": 0.1, "learning_rate": 0.0009138180067404911, "loss": 0.0933, "theoretical_loss": 4.151963325895737, "tokens_seen": 314572800 }, { "epoch": 0.1, "learning_rate": 0.0009137377627989088, "loss": 0.0944, "theoretical_loss": 4.151563465839927, "tokens_seen": 314834944 }, { "epoch": 0.1, "learning_rate": 0.0009136575188573263, "loss": 0.0949, "theoretical_loss": 4.151164031719437, "tokens_seen": 315097088 }, { "epoch": 0.1, "learning_rate": 0.0009135772749157439, "loss": 0.0956, "theoretical_loss": 4.15076502272677, "tokens_seen": 315359232 }, { "epoch": 0.1, "learning_rate": 0.0009134970309741615, "loss": 0.0934, "theoretical_loss": 4.150366438056622, "tokens_seen": 315621376 }, { "epoch": 0.1, "learning_rate": 0.000913416787032579, "loss": 0.092, "theoretical_loss": 4.149968276905888, "tokens_seen": 315883520 }, { "epoch": 0.1, "learning_rate": 0.0009133365430909967, "loss": 0.0986, "theoretical_loss": 4.149570538473644, "tokens_seen": 316145664 }, { "epoch": 0.1, "learning_rate": 0.0009132562991494142, "loss": 0.0909, "theoretical_loss": 4.149173221961146, "tokens_seen": 316407808 }, { "epoch": 0.1, "learning_rate": 0.0009131760552078318, "loss": 0.0936, "theoretical_loss": 4.1487763265718165, "tokens_seen": 316669952 }, { "epoch": 0.1, "learning_rate": 0.0009130958112662494, "loss": 0.094, "theoretical_loss": 4.148379851511241, "tokens_seen": 316932096 }, { "epoch": 0.1, "learning_rate": 0.0009130155673246669, "loss": 0.094, "theoretical_loss": 4.147983795987161, "tokens_seen": 317194240 }, { "epoch": 0.1, "learning_rate": 0.0009129353233830846, "loss": 0.0934, "theoretical_loss": 4.14758815920946, "tokens_seen": 317456384 }, { "epoch": 0.1, "learning_rate": 0.0009128550794415022, "loss": 0.0914, "theoretical_loss": 4.147192940390165, "tokens_seen": 317718528 }, { "epoch": 0.1, "learning_rate": 0.0009127748354999198, "loss": 0.0966, "theoretical_loss": 4.146798138743433, "tokens_seen": 317980672 }, { "epoch": 0.1, "learning_rate": 0.0009126945915583374, "loss": 0.09, "theoretical_loss": 4.146403753485544, "tokens_seen": 318242816 }, { "epoch": 0.1, "learning_rate": 0.000912614347616755, "loss": 0.0927, "theoretical_loss": 4.146009783834892, "tokens_seen": 318504960 }, { "epoch": 0.1, "learning_rate": 0.0009125341036751725, "loss": 0.095, "theoretical_loss": 4.145616229011987, "tokens_seen": 318767104 }, { "epoch": 0.1, "learning_rate": 0.0009124538597335902, "loss": 0.0958, "theoretical_loss": 4.145223088239432, "tokens_seen": 319029248 }, { "epoch": 0.1, "learning_rate": 0.0009123736157920077, "loss": 0.0922, "theoretical_loss": 4.14483036074193, "tokens_seen": 319291392 }, { "epoch": 0.1, "learning_rate": 0.0009122933718504252, "loss": 0.0958, "theoretical_loss": 4.14443804574627, "tokens_seen": 319553536 }, { "epoch": 0.1, "learning_rate": 0.000912213127908843, "loss": 0.0925, "theoretical_loss": 4.144046142481317, "tokens_seen": 319815680 }, { "epoch": 0.1, "learning_rate": 0.0009121328839672605, "loss": 0.0965, "theoretical_loss": 4.143654650178012, "tokens_seen": 320077824 }, { "epoch": 0.1, "learning_rate": 0.0009120526400256781, "loss": 0.0966, "theoretical_loss": 4.143263568069358, "tokens_seen": 320339968 }, { "epoch": 0.1, "learning_rate": 0.0009119723960840957, "loss": 0.0925, "theoretical_loss": 4.142872895390417, "tokens_seen": 320602112 }, { "epoch": 0.1, "learning_rate": 0.0009118921521425133, "loss": 0.0938, "theoretical_loss": 4.142482631378303, "tokens_seen": 320864256 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.00016702228458598256, "objective/train/docs_used": 123582, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8835115432739258, "objective/train/original_loss": 1.8835114240646362, "objective/train/theoretical_loss": 4.142092775272169, "objective/train/tokens_used": 341586400, "objective/train/value_avg": -0.009979248046875, "objective/train/value_loss": 0.000347665831213817, "objective/train/value_max": -0.0004355907440185547, "objective/train/value_min": -0.2398681640625, "objective/train/value_reward_corr": 0.6063551562843142, "objective/train/value_std": 0.012237548828125, "objective/train/weight_avg": 1.0003257989883423, "objective/train/weighted_lm_loss": 1.8840646743774414, "objective/train/weights_max": 1.1990368366241455, "objective/train/weights_min": 0.3737753629684448, "theoretical_loss": 4.142092775272169, "tokens_seen": 321126400 }, { "epoch": 0.1, "learning_rate": 0.0009118119082009308, "loss": 0.0913, "theoretical_loss": 4.142092775272169, "tokens_seen": 321126400 }, { "epoch": 0.1, "learning_rate": 0.0009117316642593484, "loss": 0.0893, "theoretical_loss": 4.141703326313209, "tokens_seen": 321388544 }, { "epoch": 0.1, "learning_rate": 0.000911651420317766, "loss": 0.0931, "theoretical_loss": 4.141314283744643, "tokens_seen": 321650688 }, { "epoch": 0.1, "learning_rate": 0.0009115711763761835, "loss": 0.0925, "theoretical_loss": 4.140925646811714, "tokens_seen": 321912832 }, { "epoch": 0.1, "learning_rate": 0.0009114909324346013, "loss": 0.0944, "theoretical_loss": 4.1405374147616785, "tokens_seen": 322174976 }, { "epoch": 0.1, "learning_rate": 0.0009114106884930188, "loss": 0.0928, "theoretical_loss": 4.140149586843803, "tokens_seen": 322437120 }, { "epoch": 0.1, "learning_rate": 0.0009113304445514365, "loss": 0.0937, "theoretical_loss": 4.139762162309355, "tokens_seen": 322699264 }, { "epoch": 0.1, "learning_rate": 0.000911250200609854, "loss": 0.0929, "theoretical_loss": 4.139375140411592, "tokens_seen": 322961408 }, { "epoch": 0.1, "learning_rate": 0.0009111699566682715, "loss": 0.091, "theoretical_loss": 4.138988520405764, "tokens_seen": 323223552 }, { "epoch": 0.1, "learning_rate": 0.0009110897127266892, "loss": 0.0919, "theoretical_loss": 4.138602301549097, "tokens_seen": 323485696 }, { "epoch": 0.1, "learning_rate": 0.0009110094687851067, "loss": 0.0917, "theoretical_loss": 4.138216483100795, "tokens_seen": 323747840 }, { "epoch": 0.1, "learning_rate": 0.0009109292248435243, "loss": 0.0897, "theoretical_loss": 4.137831064322021, "tokens_seen": 324009984 }, { "epoch": 0.1, "learning_rate": 0.0009108489809019419, "loss": 0.0901, "theoretical_loss": 4.1374460444759045, "tokens_seen": 324272128 }, { "epoch": 0.1, "learning_rate": 0.0009107687369603596, "loss": 0.092, "theoretical_loss": 4.137061422827525, "tokens_seen": 324534272 }, { "epoch": 0.1, "learning_rate": 0.0009106884930187771, "loss": 0.0937, "theoretical_loss": 4.136677198643908, "tokens_seen": 324796416 }, { "epoch": 0.1, "learning_rate": 0.0009106082490771947, "loss": 0.0912, "theoretical_loss": 4.13629337119402, "tokens_seen": 325058560 }, { "epoch": 0.1, "learning_rate": 0.0009105280051356123, "loss": 0.0919, "theoretical_loss": 4.135909939748757, "tokens_seen": 325320704 }, { "epoch": 0.1, "learning_rate": 0.0009104477611940298, "loss": 0.0915, "theoretical_loss": 4.135526903580946, "tokens_seen": 325582848 }, { "epoch": 0.1, "learning_rate": 0.0009103675172524475, "loss": 0.0933, "theoretical_loss": 4.135144261965327, "tokens_seen": 325844992 }, { "epoch": 0.1, "learning_rate": 0.000910287273310865, "loss": 0.0921, "theoretical_loss": 4.134762014178559, "tokens_seen": 326107136 }, { "epoch": 0.1, "learning_rate": 0.0009102070293692827, "loss": 0.092, "theoretical_loss": 4.134380159499204, "tokens_seen": 326369280 }, { "epoch": 0.1, "learning_rate": 0.0009101267854277002, "loss": 0.0928, "theoretical_loss": 4.1339986972077245, "tokens_seen": 326631424 }, { "epoch": 0.1, "learning_rate": 0.0009100465414861177, "loss": 0.0881, "theoretical_loss": 4.133617626586475, "tokens_seen": 326893568 }, { "epoch": 0.1, "learning_rate": 0.0009099662975445355, "loss": 0.0911, "theoretical_loss": 4.133236946919698, "tokens_seen": 327155712 }, { "epoch": 0.1, "learning_rate": 0.000909886053602953, "loss": 0.0969, "theoretical_loss": 4.132856657493516, "tokens_seen": 327417856 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0012691118754446507, "objective/train/docs_used": 126013, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.943372130393982, "objective/train/original_loss": 1.9433720111846924, "objective/train/theoretical_loss": 4.132476757595925, "objective/train/tokens_used": 348140000, "objective/train/value_avg": -0.00861358642578125, "objective/train/value_loss": 0.0002645694767124951, "objective/train/value_max": -0.00031757354736328125, "objective/train/value_min": -0.280517578125, "objective/train/value_reward_corr": 0.6476616622246956, "objective/train/value_std": 0.01280975341796875, "objective/train/weight_avg": 1.0013933181762695, "objective/train/weighted_lm_loss": 1.9461336135864258, "objective/train/weights_max": 1.204296588897705, "objective/train/weights_min": 0.3697463870048523, "theoretical_loss": 4.132476757595925, "tokens_seen": 327680000 }, { "epoch": 0.1, "learning_rate": 0.0009098058096613706, "loss": 0.095, "theoretical_loss": 4.132476757595925, "tokens_seen": 327680000 }, { "epoch": 0.1, "learning_rate": 0.0009097255657197882, "loss": 0.0945, "theoretical_loss": 4.132097246516788, "tokens_seen": 327942144 }, { "epoch": 0.1, "learning_rate": 0.0009096453217782058, "loss": 0.094, "theoretical_loss": 4.131718123547829, "tokens_seen": 328204288 }, { "epoch": 0.1, "learning_rate": 0.0009095650778366233, "loss": 0.0928, "theoretical_loss": 4.131339387982628, "tokens_seen": 328466432 }, { "epoch": 0.1, "learning_rate": 0.000909484833895041, "loss": 0.091, "theoretical_loss": 4.1309610391166105, "tokens_seen": 328728576 }, { "epoch": 0.1, "learning_rate": 0.0009094045899534585, "loss": 0.09, "theoretical_loss": 4.1305830762470475, "tokens_seen": 328990720 }, { "epoch": 0.1, "learning_rate": 0.000909324346011876, "loss": 0.0925, "theoretical_loss": 4.1302054986730425, "tokens_seen": 329252864 }, { "epoch": 0.1, "learning_rate": 0.0009092441020702938, "loss": 0.0918, "theoretical_loss": 4.129828305695531, "tokens_seen": 329515008 }, { "epoch": 0.1, "learning_rate": 0.0009091638581287113, "loss": 0.0915, "theoretical_loss": 4.129451496617269, "tokens_seen": 329777152 }, { "epoch": 0.1, "learning_rate": 0.0009090836141871289, "loss": 0.0936, "theoretical_loss": 4.129075070742831, "tokens_seen": 330039296 }, { "epoch": 0.1, "learning_rate": 0.0009090033702455465, "loss": 0.0929, "theoretical_loss": 4.128699027378604, "tokens_seen": 330301440 }, { "epoch": 0.1, "learning_rate": 0.0009089231263039641, "loss": 0.0921, "theoretical_loss": 4.128323365832777, "tokens_seen": 330563584 }, { "epoch": 0.1, "learning_rate": 0.0009088428823623817, "loss": 0.0936, "theoretical_loss": 4.127948085415338, "tokens_seen": 330825728 }, { "epoch": 0.1, "learning_rate": 0.0009087626384207992, "loss": 0.0937, "theoretical_loss": 4.127573185438068, "tokens_seen": 331087872 }, { "epoch": 0.1, "learning_rate": 0.0009086823944792168, "loss": 0.0959, "theoretical_loss": 4.127198665214536, "tokens_seen": 331350016 }, { "epoch": 0.1, "learning_rate": 0.0009086021505376344, "loss": 0.0897, "theoretical_loss": 4.126824524060088, "tokens_seen": 331612160 }, { "epoch": 0.1, "learning_rate": 0.000908521906596052, "loss": 0.0925, "theoretical_loss": 4.126450761291847, "tokens_seen": 331874304 }, { "epoch": 0.1, "learning_rate": 0.0009084416626544696, "loss": 0.0948, "theoretical_loss": 4.126077376228702, "tokens_seen": 332136448 }, { "epoch": 0.1, "learning_rate": 0.0009083614187128873, "loss": 0.0874, "theoretical_loss": 4.1257043681913075, "tokens_seen": 332398592 }, { "epoch": 0.1, "learning_rate": 0.0009082811747713048, "loss": 0.0917, "theoretical_loss": 4.125331736502073, "tokens_seen": 332660736 }, { "epoch": 0.1, "learning_rate": 0.0009082009308297223, "loss": 0.0937, "theoretical_loss": 4.124959480485156, "tokens_seen": 332922880 }, { "epoch": 0.1, "learning_rate": 0.00090812068688814, "loss": 0.0899, "theoretical_loss": 4.124587599466462, "tokens_seen": 333185024 }, { "epoch": 0.1, "learning_rate": 0.0009080404429465575, "loss": 0.0936, "theoretical_loss": 4.124216092773635, "tokens_seen": 333447168 }, { "epoch": 0.1, "learning_rate": 0.0009079601990049751, "loss": 0.0895, "theoretical_loss": 4.123844959736049, "tokens_seen": 333709312 }, { "epoch": 0.1, "learning_rate": 0.0009078799550633927, "loss": 0.091, "theoretical_loss": 4.123474199684807, "tokens_seen": 333971456 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.002065310487523675, "objective/train/docs_used": 128373, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9022668600082397, "objective/train/original_loss": 1.9022668600082397, "objective/train/theoretical_loss": 4.123103811952736, "objective/train/tokens_used": 354693600, "objective/train/value_avg": -0.00934600830078125, "objective/train/value_loss": 0.00017411113367415965, "objective/train/value_max": -0.000469207763671875, "objective/train/value_min": -0.1983642578125, "objective/train/value_reward_corr": 0.5511531255621919, "objective/train/value_std": 0.00977325439453125, "objective/train/weight_avg": 1.0021498203277588, "objective/train/weighted_lm_loss": 1.9055767059326172, "objective/train/weights_max": 1.1557458639144897, "objective/train/weights_min": 0.36877235770225525, "theoretical_loss": 4.123103811952736, "tokens_seen": 334233600 }, { "epoch": 0.1, "learning_rate": 0.0009077997111218103, "loss": 0.0956, "theoretical_loss": 4.123103811952736, "tokens_seen": 334233600 }, { "epoch": 0.1, "learning_rate": 0.000907719467180228, "loss": 0.089, "theoretical_loss": 4.122733795874372, "tokens_seen": 334495744 }, { "epoch": 0.1, "learning_rate": 0.0009076392232386455, "loss": 0.0941, "theoretical_loss": 4.122364150785966, "tokens_seen": 334757888 }, { "epoch": 0.1, "learning_rate": 0.0009075589792970631, "loss": 0.0942, "theoretical_loss": 4.1219948760254725, "tokens_seen": 335020032 }, { "epoch": 0.1, "learning_rate": 0.0009074787353554807, "loss": 0.093, "theoretical_loss": 4.121625970932542, "tokens_seen": 335282176 }, { "epoch": 0.1, "learning_rate": 0.0009073984914138983, "loss": 0.0926, "theoretical_loss": 4.121257434848519, "tokens_seen": 335544320 }, { "epoch": 0.1, "learning_rate": 0.0009073182474723158, "loss": 0.0921, "theoretical_loss": 4.120889267116435, "tokens_seen": 335806464 }, { "epoch": 0.1, "learning_rate": 0.0009072380035307335, "loss": 0.0923, "theoretical_loss": 4.1205214670810015, "tokens_seen": 336068608 }, { "epoch": 0.1, "learning_rate": 0.000907157759589151, "loss": 0.0923, "theoretical_loss": 4.120154034088609, "tokens_seen": 336330752 }, { "epoch": 0.1, "learning_rate": 0.0009070775156475685, "loss": 0.0922, "theoretical_loss": 4.119786967487314, "tokens_seen": 336592896 }, { "epoch": 0.1, "learning_rate": 0.0009069972717059863, "loss": 0.0935, "theoretical_loss": 4.11942026662684, "tokens_seen": 336855040 }, { "epoch": 0.1, "learning_rate": 0.0009069170277644038, "loss": 0.0896, "theoretical_loss": 4.11905393085857, "tokens_seen": 337117184 }, { "epoch": 0.1, "learning_rate": 0.0009068367838228214, "loss": 0.0926, "theoretical_loss": 4.118687959535539, "tokens_seen": 337379328 }, { "epoch": 0.1, "learning_rate": 0.000906756539881239, "loss": 0.0906, "theoretical_loss": 4.118322352012429, "tokens_seen": 337641472 }, { "epoch": 0.1, "learning_rate": 0.0009066762959396566, "loss": 0.0902, "theoretical_loss": 4.117957107645569, "tokens_seen": 337903616 }, { "epoch": 0.1, "learning_rate": 0.0009065960519980741, "loss": 0.0925, "theoretical_loss": 4.1175922257929205, "tokens_seen": 338165760 }, { "epoch": 0.1, "learning_rate": 0.0009065158080564918, "loss": 0.0945, "theoretical_loss": 4.117227705814078, "tokens_seen": 338427904 }, { "epoch": 0.1, "learning_rate": 0.0009064355641149093, "loss": 0.0971, "theoretical_loss": 4.116863547070264, "tokens_seen": 338690048 }, { "epoch": 0.1, "learning_rate": 0.0009063553201733269, "loss": 0.0927, "theoretical_loss": 4.116499748924319, "tokens_seen": 338952192 }, { "epoch": 0.1, "learning_rate": 0.0009062750762317446, "loss": 0.093, "theoretical_loss": 4.116136310740702, "tokens_seen": 339214336 }, { "epoch": 0.1, "learning_rate": 0.0009061948322901621, "loss": 0.0871, "theoretical_loss": 4.115773231885479, "tokens_seen": 339476480 }, { "epoch": 0.1, "learning_rate": 0.0009061145883485798, "loss": 0.0904, "theoretical_loss": 4.115410511726323, "tokens_seen": 339738624 }, { "epoch": 0.1, "learning_rate": 0.0009060343444069973, "loss": 0.0921, "theoretical_loss": 4.115048149632507, "tokens_seen": 340000768 }, { "epoch": 0.1, "learning_rate": 0.0009059541004654149, "loss": 0.0933, "theoretical_loss": 4.114686144974897, "tokens_seen": 340262912 }, { "epoch": 0.1, "learning_rate": 0.0009058738565238325, "loss": 0.0923, "theoretical_loss": 4.114324497125947, "tokens_seen": 340525056 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0013270352501422167, "objective/train/docs_used": 130755, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.773648977279663, "objective/train/original_loss": 1.7736492156982422, "objective/train/theoretical_loss": 4.113963205459697, "objective/train/tokens_used": 361247200, "objective/train/value_avg": -0.0078582763671875, "objective/train/value_loss": 0.0002814758918248117, "objective/train/value_max": -0.0004305839538574219, "objective/train/value_min": -0.75634765625, "objective/train/value_reward_corr": 0.6420233629882061, "objective/train/value_std": 0.01309967041015625, "objective/train/weight_avg": 1.0014532804489136, "objective/train/weighted_lm_loss": 1.775871753692627, "objective/train/weights_max": 1.4132821559906006, "objective/train/weights_min": 0.38253408670425415, "theoretical_loss": 4.113963205459697, "tokens_seen": 340787200 }, { "epoch": 0.1, "learning_rate": 0.00090579361258225, "loss": 0.0906, "theoretical_loss": 4.113963205459697, "tokens_seen": 340787200 }, { "epoch": 0.1, "learning_rate": 0.0009057133686406676, "loss": 0.091, "theoretical_loss": 4.113602269351765, "tokens_seen": 341049344 }, { "epoch": 0.1, "learning_rate": 0.0009056331246990852, "loss": 0.0903, "theoretical_loss": 4.113241688179341, "tokens_seen": 341311488 }, { "epoch": 0.1, "learning_rate": 0.0009055528807575029, "loss": 0.093, "theoretical_loss": 4.1128814613211855, "tokens_seen": 341573632 }, { "epoch": 0.1, "learning_rate": 0.0009054726368159204, "loss": 0.0896, "theoretical_loss": 4.1125215881576205, "tokens_seen": 341835776 }, { "epoch": 0.1, "learning_rate": 0.0009053923928743381, "loss": 0.0899, "theoretical_loss": 4.112162068070525, "tokens_seen": 342097920 }, { "epoch": 0.1, "learning_rate": 0.0009053121489327556, "loss": 0.0911, "theoretical_loss": 4.111802900443333, "tokens_seen": 342360064 }, { "epoch": 0.1, "learning_rate": 0.0009052319049911731, "loss": 0.0891, "theoretical_loss": 4.111444084661026, "tokens_seen": 342622208 }, { "epoch": 0.1, "learning_rate": 0.0009051516610495908, "loss": 0.0888, "theoretical_loss": 4.111085620110127, "tokens_seen": 342884352 }, { "epoch": 0.1, "learning_rate": 0.0009050714171080083, "loss": 0.0914, "theoretical_loss": 4.110727506178697, "tokens_seen": 343146496 }, { "epoch": 0.1, "learning_rate": 0.000904991173166426, "loss": 0.0929, "theoretical_loss": 4.110369742256329, "tokens_seen": 343408640 }, { "epoch": 0.1, "learning_rate": 0.0009049109292248435, "loss": 0.09, "theoretical_loss": 4.110012327734145, "tokens_seen": 343670784 }, { "epoch": 0.1, "learning_rate": 0.0009048306852832611, "loss": 0.091, "theoretical_loss": 4.1096552620047895, "tokens_seen": 343932928 }, { "epoch": 0.1, "learning_rate": 0.0009047504413416788, "loss": 0.0921, "theoretical_loss": 4.109298544462423, "tokens_seen": 344195072 }, { "epoch": 0.1, "learning_rate": 0.0009046701974000963, "loss": 0.0867, "theoretical_loss": 4.108942174502721, "tokens_seen": 344457216 }, { "epoch": 0.1, "learning_rate": 0.0009045899534585139, "loss": 0.0907, "theoretical_loss": 4.108586151522863, "tokens_seen": 344719360 }, { "epoch": 0.1, "learning_rate": 0.0009045097095169315, "loss": 0.0932, "theoretical_loss": 4.1082304749215375, "tokens_seen": 344981504 }, { "epoch": 0.1, "learning_rate": 0.0009044294655753491, "loss": 0.091, "theoretical_loss": 4.107875144098925, "tokens_seen": 345243648 }, { "epoch": 0.1, "learning_rate": 0.0009043492216337666, "loss": 0.0949, "theoretical_loss": 4.107520158456703, "tokens_seen": 345505792 }, { "epoch": 0.1, "learning_rate": 0.0009042689776921843, "loss": 0.0901, "theoretical_loss": 4.107165517398034, "tokens_seen": 345767936 }, { "epoch": 0.1, "learning_rate": 0.0009041887337506018, "loss": 0.0926, "theoretical_loss": 4.106811220327568, "tokens_seen": 346030080 }, { "epoch": 0.1, "learning_rate": 0.0009041084898090193, "loss": 0.0906, "theoretical_loss": 4.10645726665143, "tokens_seen": 346292224 }, { "epoch": 0.11, "learning_rate": 0.0009040282458674371, "loss": 0.0934, "theoretical_loss": 4.10610365577722, "tokens_seen": 346554368 }, { "epoch": 0.11, "learning_rate": 0.0009039480019258546, "loss": 0.0913, "theoretical_loss": 4.105750387114009, "tokens_seen": 346816512 }, { "epoch": 0.11, "learning_rate": 0.0009038677579842723, "loss": 0.0905, "theoretical_loss": 4.105397460072329, "tokens_seen": 347078656 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.002003602683544159, "objective/train/docs_used": 133174, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.733391523361206, "objective/train/original_loss": 1.7333914041519165, "objective/train/theoretical_loss": 4.105044874064174, "objective/train/tokens_used": 367800800, "objective/train/value_avg": -0.0088043212890625, "objective/train/value_loss": 0.00023858436907175928, "objective/train/value_max": -0.0003998279571533203, "objective/train/value_min": -0.9033203125, "objective/train/value_reward_corr": 0.6106213393331679, "objective/train/value_std": 0.0132904052734375, "objective/train/weight_avg": 1.002118468284607, "objective/train/weighted_lm_loss": 1.736554741859436, "objective/train/weights_max": 2.2593677043914795, "objective/train/weights_min": 0.37059226632118225, "theoretical_loss": 4.105044874064174, "tokens_seen": 347340800 }, { "epoch": 0.11, "learning_rate": 0.0009037875140426898, "loss": 0.0871, "theoretical_loss": 4.105044874064174, "tokens_seen": 347340800 }, { "epoch": 0.11, "learning_rate": 0.0009037072701011074, "loss": 0.0909, "theoretical_loss": 4.104692628502993, "tokens_seen": 347602944 }, { "epoch": 0.11, "learning_rate": 0.000903627026159525, "loss": 0.0895, "theoretical_loss": 4.104340722803683, "tokens_seen": 347865088 }, { "epoch": 0.11, "learning_rate": 0.0009035467822179425, "loss": 0.0903, "theoretical_loss": 4.103989156382589, "tokens_seen": 348127232 }, { "epoch": 0.11, "learning_rate": 0.0009034665382763601, "loss": 0.0873, "theoretical_loss": 4.103637928657495, "tokens_seen": 348389376 }, { "epoch": 0.11, "learning_rate": 0.0009033862943347777, "loss": 0.0953, "theoretical_loss": 4.103287039047622, "tokens_seen": 348651520 }, { "epoch": 0.11, "learning_rate": 0.0009033060503931954, "loss": 0.0901, "theoretical_loss": 4.102936486973624, "tokens_seen": 348913664 }, { "epoch": 0.11, "learning_rate": 0.0009032258064516129, "loss": 0.0896, "theoretical_loss": 4.102586271857579, "tokens_seen": 349175808 }, { "epoch": 0.11, "learning_rate": 0.0009031455625100306, "loss": 0.0882, "theoretical_loss": 4.102236393122989, "tokens_seen": 349437952 }, { "epoch": 0.11, "learning_rate": 0.0009030653185684481, "loss": 0.0915, "theoretical_loss": 4.101886850194775, "tokens_seen": 349700096 }, { "epoch": 0.11, "learning_rate": 0.0009029850746268657, "loss": 0.0882, "theoretical_loss": 4.10153764249927, "tokens_seen": 349962240 }, { "epoch": 0.11, "learning_rate": 0.0009029048306852833, "loss": 0.0884, "theoretical_loss": 4.1011887694642155, "tokens_seen": 350224384 }, { "epoch": 0.11, "learning_rate": 0.0009028245867437008, "loss": 0.0897, "theoretical_loss": 4.100840230518759, "tokens_seen": 350486528 }, { "epoch": 0.11, "learning_rate": 0.0009027443428021184, "loss": 0.0918, "theoretical_loss": 4.100492025093445, "tokens_seen": 350748672 }, { "epoch": 0.11, "learning_rate": 0.000902664098860536, "loss": 0.0919, "theoretical_loss": 4.100144152620215, "tokens_seen": 351010816 }, { "epoch": 0.11, "learning_rate": 0.0009025838549189537, "loss": 0.0904, "theoretical_loss": 4.099796612532403, "tokens_seen": 351272960 }, { "epoch": 0.11, "learning_rate": 0.0009025036109773713, "loss": 0.0898, "theoretical_loss": 4.0994494042647265, "tokens_seen": 351535104 }, { "epoch": 0.11, "learning_rate": 0.0009024233670357889, "loss": 0.0904, "theoretical_loss": 4.099102527253285, "tokens_seen": 351797248 }, { "epoch": 0.11, "learning_rate": 0.0009023431230942064, "loss": 0.0923, "theoretical_loss": 4.098755980935557, "tokens_seen": 352059392 }, { "epoch": 0.11, "learning_rate": 0.000902262879152624, "loss": 0.0893, "theoretical_loss": 4.0984097647503965, "tokens_seen": 352321536 }, { "epoch": 0.11, "learning_rate": 0.0009021826352110416, "loss": 0.0903, "theoretical_loss": 4.09806387813802, "tokens_seen": 352583680 }, { "epoch": 0.11, "learning_rate": 0.0009021023912694591, "loss": 0.0875, "theoretical_loss": 4.0977183205400145, "tokens_seen": 352845824 }, { "epoch": 0.11, "learning_rate": 0.0009020221473278768, "loss": 0.0897, "theoretical_loss": 4.097373091399324, "tokens_seen": 353107968 }, { "epoch": 0.11, "learning_rate": 0.0009019419033862943, "loss": 0.0896, "theoretical_loss": 4.097028190160249, "tokens_seen": 353370112 }, { "epoch": 0.11, "learning_rate": 0.000901861659444712, "loss": 0.0909, "theoretical_loss": 4.096683616268442, "tokens_seen": 353632256 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.003045022254809737, "objective/train/docs_used": 135614, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.740461826324463, "objective/train/original_loss": 1.7404619455337524, "objective/train/theoretical_loss": 4.096339369170902, "objective/train/tokens_used": 374354400, "objective/train/value_avg": -0.01047515869140625, "objective/train/value_loss": 0.0004944366519339383, "objective/train/value_max": -0.00038909912109375, "objective/train/value_min": -0.673828125, "objective/train/value_reward_corr": 0.5444227902078449, "objective/train/value_std": 0.01522064208984375, "objective/train/weight_avg": 1.00326406955719, "objective/train/weighted_lm_loss": 1.7446165084838867, "objective/train/weights_max": 1.2945656776428223, "objective/train/weights_min": 0.3746718466281891, "theoretical_loss": 4.096339369170902, "tokens_seen": 353894400 }, { "epoch": 0.11, "learning_rate": 0.0009017814155031296, "loss": 0.0898, "theoretical_loss": 4.096339369170902, "tokens_seen": 353894400 }, { "epoch": 0.11, "learning_rate": 0.0009017011715615471, "loss": 0.0909, "theoretical_loss": 4.095995448315972, "tokens_seen": 354156544 }, { "epoch": 0.11, "learning_rate": 0.0009016209276199647, "loss": 0.0899, "theoretical_loss": 4.095651853153331, "tokens_seen": 354418688 }, { "epoch": 0.11, "learning_rate": 0.0009015406836783823, "loss": 0.0912, "theoretical_loss": 4.095308583133997, "tokens_seen": 354680832 }, { "epoch": 0.11, "learning_rate": 0.0009014604397367999, "loss": 0.0915, "theoretical_loss": 4.094965637710314, "tokens_seen": 354942976 }, { "epoch": 0.11, "learning_rate": 0.0009013801957952175, "loss": 0.0903, "theoretical_loss": 4.094623016335954, "tokens_seen": 355205120 }, { "epoch": 0.11, "learning_rate": 0.0009012999518536351, "loss": 0.0899, "theoretical_loss": 4.094280718465911, "tokens_seen": 355467264 }, { "epoch": 0.11, "learning_rate": 0.0009012197079120526, "loss": 0.09, "theoretical_loss": 4.093938743556496, "tokens_seen": 355729408 }, { "epoch": 0.11, "learning_rate": 0.0009011394639704702, "loss": 0.0889, "theoretical_loss": 4.093597091065333, "tokens_seen": 355991552 }, { "epoch": 0.11, "learning_rate": 0.0009010592200288879, "loss": 0.089, "theoretical_loss": 4.093255760451357, "tokens_seen": 356253696 }, { "epoch": 0.11, "learning_rate": 0.0009009789760873054, "loss": 0.0883, "theoretical_loss": 4.092914751174808, "tokens_seen": 356515840 }, { "epoch": 0.11, "learning_rate": 0.0009008987321457231, "loss": 0.0898, "theoretical_loss": 4.092574062697225, "tokens_seen": 356777984 }, { "epoch": 0.11, "learning_rate": 0.0009008184882041406, "loss": 0.0881, "theoretical_loss": 4.092233694481447, "tokens_seen": 357040128 }, { "epoch": 0.11, "learning_rate": 0.0009007382442625582, "loss": 0.0927, "theoretical_loss": 4.091893645991604, "tokens_seen": 357302272 }, { "epoch": 0.11, "learning_rate": 0.0009006580003209758, "loss": 0.0913, "theoretical_loss": 4.091553916693115, "tokens_seen": 357564416 }, { "epoch": 0.11, "learning_rate": 0.0009005777563793933, "loss": 0.0859, "theoretical_loss": 4.091214506052687, "tokens_seen": 357826560 }, { "epoch": 0.11, "learning_rate": 0.0009004975124378109, "loss": 0.0841, "theoretical_loss": 4.090875413538302, "tokens_seen": 358088704 }, { "epoch": 0.11, "learning_rate": 0.0009004172684962285, "loss": 0.0921, "theoretical_loss": 4.090536638619224, "tokens_seen": 358350848 }, { "epoch": 0.11, "learning_rate": 0.0009003370245546462, "loss": 0.0872, "theoretical_loss": 4.090198180765989, "tokens_seen": 358612992 }, { "epoch": 0.11, "learning_rate": 0.0009002567806130637, "loss": 0.0893, "theoretical_loss": 4.0898600394504, "tokens_seen": 358875136 }, { "epoch": 0.11, "learning_rate": 0.0009001765366714814, "loss": 0.0888, "theoretical_loss": 4.089522214145525, "tokens_seen": 359137280 }, { "epoch": 0.11, "learning_rate": 0.0009000962927298989, "loss": 0.0911, "theoretical_loss": 4.089184704325695, "tokens_seen": 359399424 }, { "epoch": 0.11, "learning_rate": 0.0009000160487883166, "loss": 0.0886, "theoretical_loss": 4.088847509466497, "tokens_seen": 359661568 }, { "epoch": 0.11, "learning_rate": 0.0008999358048467341, "loss": 0.0885, "theoretical_loss": 4.088510629044771, "tokens_seen": 359923712 }, { "epoch": 0.11, "learning_rate": 0.0008998555609051516, "loss": 0.0885, "theoretical_loss": 4.088174062538605, "tokens_seen": 360185856 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0014361967332661152, "objective/train/docs_used": 137426, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.912741780281067, "objective/train/original_loss": 1.9127418994903564, "objective/train/theoretical_loss": 4.087837809427334, "objective/train/tokens_used": 380908000, "objective/train/value_avg": -0.0081939697265625, "objective/train/value_loss": 0.00017770560225471854, "objective/train/value_max": -0.0002913475036621094, "objective/train/value_min": -0.2374267578125, "objective/train/value_reward_corr": 0.6243051078626306, "objective/train/value_std": 0.0115814208984375, "objective/train/weight_avg": 1.0015201568603516, "objective/train/weighted_lm_loss": 1.9144210815429688, "objective/train/weights_max": 1.1172159910202026, "objective/train/weights_min": 0.36891940236091614, "theoretical_loss": 4.087837809427334, "tokens_seen": 360448000 }, { "epoch": 0.11, "learning_rate": 0.0008997753169635693, "loss": 0.093, "theoretical_loss": 4.087837809427334, "tokens_seen": 360448000 }, { "epoch": 0.11, "learning_rate": 0.0008996950730219868, "loss": 0.088, "theoretical_loss": 4.087501869191536, "tokens_seen": 360710144 }, { "epoch": 0.11, "learning_rate": 0.0008996148290804045, "loss": 0.0877, "theoretical_loss": 4.087166241313023, "tokens_seen": 360972288 }, { "epoch": 0.11, "learning_rate": 0.0008995345851388221, "loss": 0.0925, "theoretical_loss": 4.086830925274842, "tokens_seen": 361234432 }, { "epoch": 0.11, "learning_rate": 0.0008994543411972397, "loss": 0.0887, "theoretical_loss": 4.08649592056127, "tokens_seen": 361496576 }, { "epoch": 0.11, "learning_rate": 0.0008993740972556572, "loss": 0.0926, "theoretical_loss": 4.086161226657811, "tokens_seen": 361758720 }, { "epoch": 0.11, "learning_rate": 0.0008992938533140748, "loss": 0.0857, "theoretical_loss": 4.085826843051191, "tokens_seen": 362020864 }, { "epoch": 0.11, "learning_rate": 0.0008992136093724924, "loss": 0.0915, "theoretical_loss": 4.0854927692293534, "tokens_seen": 362283008 }, { "epoch": 0.11, "learning_rate": 0.0008991333654309099, "loss": 0.0891, "theoretical_loss": 4.085159004681458, "tokens_seen": 362545152 }, { "epoch": 0.11, "learning_rate": 0.0008990531214893276, "loss": 0.0904, "theoretical_loss": 4.084825548897873, "tokens_seen": 362807296 }, { "epoch": 0.11, "learning_rate": 0.0008989728775477451, "loss": 0.0932, "theoretical_loss": 4.084492401370177, "tokens_seen": 363069440 }, { "epoch": 0.11, "learning_rate": 0.0008988926336061629, "loss": 0.0911, "theoretical_loss": 4.08415956159115, "tokens_seen": 363331584 }, { "epoch": 0.11, "learning_rate": 0.0008988123896645804, "loss": 0.0894, "theoretical_loss": 4.083827029054773, "tokens_seen": 363593728 }, { "epoch": 0.11, "learning_rate": 0.0008987321457229979, "loss": 0.0912, "theoretical_loss": 4.0834948032562215, "tokens_seen": 363855872 }, { "epoch": 0.11, "learning_rate": 0.0008986519017814156, "loss": 0.0909, "theoretical_loss": 4.083162883691864, "tokens_seen": 364118016 }, { "epoch": 0.11, "learning_rate": 0.0008985716578398331, "loss": 0.0913, "theoretical_loss": 4.082831269859261, "tokens_seen": 364380160 }, { "epoch": 0.11, "learning_rate": 0.0008984914138982507, "loss": 0.0901, "theoretical_loss": 4.0824999612571515, "tokens_seen": 364642304 }, { "epoch": 0.11, "learning_rate": 0.0008984111699566683, "loss": 0.0889, "theoretical_loss": 4.082168957385462, "tokens_seen": 364904448 }, { "epoch": 0.11, "learning_rate": 0.0008983309260150859, "loss": 0.0917, "theoretical_loss": 4.081838257745293, "tokens_seen": 365166592 }, { "epoch": 0.11, "learning_rate": 0.0008982506820735034, "loss": 0.0873, "theoretical_loss": 4.081507861838922, "tokens_seen": 365428736 }, { "epoch": 0.11, "learning_rate": 0.000898170438131921, "loss": 0.0901, "theoretical_loss": 4.081177769169795, "tokens_seen": 365690880 }, { "epoch": 0.11, "learning_rate": 0.0008980901941903387, "loss": 0.0872, "theoretical_loss": 4.080847979242526, "tokens_seen": 365953024 }, { "epoch": 0.11, "learning_rate": 0.0008980099502487562, "loss": 0.0898, "theoretical_loss": 4.0805184915628905, "tokens_seen": 366215168 }, { "epoch": 0.11, "learning_rate": 0.0008979297063071739, "loss": 0.0891, "theoretical_loss": 4.080189305637827, "tokens_seen": 366477312 }, { "epoch": 0.11, "learning_rate": 0.0008978494623655914, "loss": 0.0893, "theoretical_loss": 4.079860420975429, "tokens_seen": 366739456 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0024263346567749977, "objective/train/docs_used": 139849, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8241596221923828, "objective/train/original_loss": 1.8241596221923828, "objective/train/theoretical_loss": 4.07953183708494, "objective/train/tokens_used": 387461600, "objective/train/value_avg": -0.0073089599609375, "objective/train/value_loss": 0.00016963874804787338, "objective/train/value_max": -0.00034999847412109375, "objective/train/value_min": -0.385498046875, "objective/train/value_reward_corr": 0.4735220244507201, "objective/train/value_std": 0.008209228515625, "objective/train/weight_avg": 1.002504587173462, "objective/train/weighted_lm_loss": 1.8287756443023682, "objective/train/weights_max": 1.386350154876709, "objective/train/weights_min": 0.3713734447956085, "theoretical_loss": 4.07953183708494, "tokens_seen": 367001600 }, { "epoch": 0.11, "learning_rate": 0.000897769218424009, "loss": 0.0945, "theoretical_loss": 4.07953183708494, "tokens_seen": 367001600 }, { "epoch": 0.11, "learning_rate": 0.0008976889744824266, "loss": 0.0911, "theoretical_loss": 4.079203553476759, "tokens_seen": 367263744 }, { "epoch": 0.11, "learning_rate": 0.0008976087305408441, "loss": 0.0914, "theoretical_loss": 4.078875569662424, "tokens_seen": 367525888 }, { "epoch": 0.11, "learning_rate": 0.0008975284865992618, "loss": 0.0883, "theoretical_loss": 4.07854788515462, "tokens_seen": 367788032 }, { "epoch": 0.11, "learning_rate": 0.0008974482426576793, "loss": 0.0893, "theoretical_loss": 4.078220499467168, "tokens_seen": 368050176 }, { "epoch": 0.11, "learning_rate": 0.000897367998716097, "loss": 0.0896, "theoretical_loss": 4.077893412115025, "tokens_seen": 368312320 }, { "epoch": 0.11, "learning_rate": 0.0008972877547745146, "loss": 0.0872, "theoretical_loss": 4.077566622614281, "tokens_seen": 368574464 }, { "epoch": 0.11, "learning_rate": 0.0008972075108329322, "loss": 0.089, "theoretical_loss": 4.077240130482153, "tokens_seen": 368836608 }, { "epoch": 0.11, "learning_rate": 0.0008971272668913497, "loss": 0.0912, "theoretical_loss": 4.076913935236982, "tokens_seen": 369098752 }, { "epoch": 0.11, "learning_rate": 0.0008970470229497674, "loss": 0.089, "theoretical_loss": 4.076588036398235, "tokens_seen": 369360896 }, { "epoch": 0.11, "learning_rate": 0.0008969667790081849, "loss": 0.0887, "theoretical_loss": 4.076262433486491, "tokens_seen": 369623040 }, { "epoch": 0.11, "learning_rate": 0.0008968865350666024, "loss": 0.0909, "theoretical_loss": 4.075937126023448, "tokens_seen": 369885184 }, { "epoch": 0.11, "learning_rate": 0.0008968062911250201, "loss": 0.0886, "theoretical_loss": 4.075612113531915, "tokens_seen": 370147328 }, { "epoch": 0.11, "learning_rate": 0.0008967260471834376, "loss": 0.0882, "theoretical_loss": 4.075287395535807, "tokens_seen": 370409472 }, { "epoch": 0.11, "learning_rate": 0.0008966458032418553, "loss": 0.0894, "theoretical_loss": 4.074962971560145, "tokens_seen": 370671616 }, { "epoch": 0.11, "learning_rate": 0.0008965655593002729, "loss": 0.0882, "theoretical_loss": 4.074638841131052, "tokens_seen": 370933760 }, { "epoch": 0.11, "learning_rate": 0.0008964853153586905, "loss": 0.09, "theoretical_loss": 4.074315003775746, "tokens_seen": 371195904 }, { "epoch": 0.11, "learning_rate": 0.000896405071417108, "loss": 0.0913, "theoretical_loss": 4.073991459022544, "tokens_seen": 371458048 }, { "epoch": 0.11, "learning_rate": 0.0008963248274755256, "loss": 0.0868, "theoretical_loss": 4.073668206400851, "tokens_seen": 371720192 }, { "epoch": 0.11, "learning_rate": 0.0008962445835339432, "loss": 0.0952, "theoretical_loss": 4.0733452454411605, "tokens_seen": 371982336 }, { "epoch": 0.11, "learning_rate": 0.0008961643395923608, "loss": 0.0901, "theoretical_loss": 4.0730225756750515, "tokens_seen": 372244480 }, { "epoch": 0.11, "learning_rate": 0.0008960840956507784, "loss": 0.0874, "theoretical_loss": 4.072700196635185, "tokens_seen": 372506624 }, { "epoch": 0.11, "learning_rate": 0.0008960038517091959, "loss": 0.0904, "theoretical_loss": 4.072378107855299, "tokens_seen": 372768768 }, { "epoch": 0.11, "learning_rate": 0.0008959236077676137, "loss": 0.0864, "theoretical_loss": 4.0720563088702075, "tokens_seen": 373030912 }, { "epoch": 0.11, "learning_rate": 0.0008958433638260312, "loss": 0.0887, "theoretical_loss": 4.071734799215796, "tokens_seen": 373293056 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0020079510286450386, "objective/train/docs_used": 142333, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8369685411453247, "objective/train/original_loss": 1.8369685411453247, "objective/train/theoretical_loss": 4.071413578429017, "objective/train/tokens_used": 394015200, "objective/train/value_avg": -0.01097869873046875, "objective/train/value_loss": 0.000343000196153298, "objective/train/value_max": -0.00045299530029296875, "objective/train/value_min": -0.6923828125, "objective/train/value_reward_corr": 0.672175762308715, "objective/train/value_std": 0.0175018310546875, "objective/train/weight_avg": 1.0021660327911377, "objective/train/weighted_lm_loss": 1.8394285440444946, "objective/train/weights_max": 1.6616524457931519, "objective/train/weights_min": 0.37235790491104126, "theoretical_loss": 4.071413578429017, "tokens_seen": 373555200 }, { "epoch": 0.11, "learning_rate": 0.0008957631198844487, "loss": 0.0874, "theoretical_loss": 4.071413578429017, "tokens_seen": 373555200 }, { "epoch": 0.11, "learning_rate": 0.0008956828759428664, "loss": 0.0895, "theoretical_loss": 4.071092646047892, "tokens_seen": 373817344 }, { "epoch": 0.11, "learning_rate": 0.0008956026320012839, "loss": 0.088, "theoretical_loss": 4.0707720016115, "tokens_seen": 374079488 }, { "epoch": 0.11, "learning_rate": 0.0008955223880597015, "loss": 0.0872, "theoretical_loss": 4.0704516446599825, "tokens_seen": 374341632 }, { "epoch": 0.11, "learning_rate": 0.0008954421441181191, "loss": 0.0915, "theoretical_loss": 4.070131574734536, "tokens_seen": 374603776 }, { "epoch": 0.11, "learning_rate": 0.0008953619001765367, "loss": 0.0894, "theoretical_loss": 4.069811791377409, "tokens_seen": 374865920 }, { "epoch": 0.11, "learning_rate": 0.0008952816562349542, "loss": 0.0883, "theoretical_loss": 4.0694922941319, "tokens_seen": 375128064 }, { "epoch": 0.11, "learning_rate": 0.0008952014122933718, "loss": 0.091, "theoretical_loss": 4.069173082542351, "tokens_seen": 375390208 }, { "epoch": 0.11, "learning_rate": 0.0008951211683517895, "loss": 0.0935, "theoretical_loss": 4.068854156154154, "tokens_seen": 375652352 }, { "epoch": 0.11, "learning_rate": 0.000895040924410207, "loss": 0.0902, "theoretical_loss": 4.068535514513734, "tokens_seen": 375914496 }, { "epoch": 0.11, "learning_rate": 0.0008949606804686247, "loss": 0.0913, "theoretical_loss": 4.068217157168556, "tokens_seen": 376176640 }, { "epoch": 0.11, "learning_rate": 0.0008948804365270422, "loss": 0.0892, "theoretical_loss": 4.06789908366712, "tokens_seen": 376438784 }, { "epoch": 0.11, "learning_rate": 0.0008948001925854599, "loss": 0.0916, "theoretical_loss": 4.067581293558955, "tokens_seen": 376700928 }, { "epoch": 0.11, "learning_rate": 0.0008947199486438774, "loss": 0.0913, "theoretical_loss": 4.0672637863946175, "tokens_seen": 376963072 }, { "epoch": 0.11, "learning_rate": 0.0008946397047022949, "loss": 0.0882, "theoretical_loss": 4.0669465617256915, "tokens_seen": 377225216 }, { "epoch": 0.11, "learning_rate": 0.0008945594607607126, "loss": 0.0874, "theoretical_loss": 4.06662961910478, "tokens_seen": 377487360 }, { "epoch": 0.11, "learning_rate": 0.0008944792168191301, "loss": 0.0868, "theoretical_loss": 4.066312958085503, "tokens_seen": 377749504 }, { "epoch": 0.11, "learning_rate": 0.0008943989728775478, "loss": 0.0875, "theoretical_loss": 4.065996578222502, "tokens_seen": 378011648 }, { "epoch": 0.11, "learning_rate": 0.0008943187289359654, "loss": 0.0902, "theoretical_loss": 4.065680479071426, "tokens_seen": 378273792 }, { "epoch": 0.11, "learning_rate": 0.000894238484994383, "loss": 0.0869, "theoretical_loss": 4.0653646601889335, "tokens_seen": 378535936 }, { "epoch": 0.11, "learning_rate": 0.0008941582410528005, "loss": 0.0885, "theoretical_loss": 4.065049121132693, "tokens_seen": 378798080 }, { "epoch": 0.11, "learning_rate": 0.0008940779971112181, "loss": 0.0877, "theoretical_loss": 4.0647338614613755, "tokens_seen": 379060224 }, { "epoch": 0.11, "learning_rate": 0.0008939977531696357, "loss": 0.0859, "theoretical_loss": 4.0644188807346495, "tokens_seen": 379322368 }, { "epoch": 0.12, "learning_rate": 0.0008939175092280532, "loss": 0.0903, "theoretical_loss": 4.064104178513186, "tokens_seen": 379584512 }, { "epoch": 0.12, "learning_rate": 0.0008938372652864709, "loss": 0.0884, "theoretical_loss": 4.0637897543586465, "tokens_seen": 379846656 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0015821303240954876, "objective/train/docs_used": 144698, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6598097085952759, "objective/train/original_loss": 1.6598095893859863, "objective/train/theoretical_loss": 4.063475607833687, "objective/train/tokens_used": 400568800, "objective/train/value_avg": -0.007183074951171875, "objective/train/value_loss": 0.00021680242207366973, "objective/train/value_max": -0.0001881122589111328, "objective/train/value_min": -0.3359375, "objective/train/value_reward_corr": 0.5725408863624046, "objective/train/value_std": 0.00899505615234375, "objective/train/weight_avg": 1.0016818046569824, "objective/train/weighted_lm_loss": 1.662369728088379, "objective/train/weights_max": 1.1277165412902832, "objective/train/weights_min": 0.373598575592041, "theoretical_loss": 4.063475607833687, "tokens_seen": 380108800 }, { "epoch": 0.12, "learning_rate": 0.0008937570213448884, "loss": 0.0865, "theoretical_loss": 4.063475607833687, "tokens_seen": 380108800 }, { "epoch": 0.12, "learning_rate": 0.0008936767774033062, "loss": 0.089, "theoretical_loss": 4.063161738501951, "tokens_seen": 380370944 }, { "epoch": 0.12, "learning_rate": 0.0008935965334617237, "loss": 0.0886, "theoretical_loss": 4.0628481459280685, "tokens_seen": 380633088 }, { "epoch": 0.12, "learning_rate": 0.0008935162895201413, "loss": 0.0867, "theoretical_loss": 4.062534829677653, "tokens_seen": 380895232 }, { "epoch": 0.12, "learning_rate": 0.0008934360455785589, "loss": 0.0888, "theoretical_loss": 4.062221789317297, "tokens_seen": 381157376 }, { "epoch": 0.12, "learning_rate": 0.0008933558016369764, "loss": 0.087, "theoretical_loss": 4.061909024414572, "tokens_seen": 381419520 }, { "epoch": 0.12, "learning_rate": 0.000893275557695394, "loss": 0.0875, "theoretical_loss": 4.061596534538021, "tokens_seen": 381681664 }, { "epoch": 0.12, "learning_rate": 0.0008931953137538116, "loss": 0.0909, "theoretical_loss": 4.061284319257162, "tokens_seen": 381943808 }, { "epoch": 0.12, "learning_rate": 0.0008931150698122292, "loss": 0.0901, "theoretical_loss": 4.060972378142479, "tokens_seen": 382205952 }, { "epoch": 0.12, "learning_rate": 0.0008930348258706467, "loss": 0.0896, "theoretical_loss": 4.060660710765423, "tokens_seen": 382468096 }, { "epoch": 0.12, "learning_rate": 0.0008929545819290645, "loss": 0.0909, "theoretical_loss": 4.060349316698408, "tokens_seen": 382730240 }, { "epoch": 0.12, "learning_rate": 0.000892874337987482, "loss": 0.0874, "theoretical_loss": 4.060038195514808, "tokens_seen": 382992384 }, { "epoch": 0.12, "learning_rate": 0.0008927940940458995, "loss": 0.0866, "theoretical_loss": 4.059727346788955, "tokens_seen": 383254528 }, { "epoch": 0.12, "learning_rate": 0.0008927138501043172, "loss": 0.087, "theoretical_loss": 4.059416770096134, "tokens_seen": 383516672 }, { "epoch": 0.12, "learning_rate": 0.0008926336061627347, "loss": 0.0885, "theoretical_loss": 4.059106465012583, "tokens_seen": 383778816 }, { "epoch": 0.12, "learning_rate": 0.0008925533622211523, "loss": 0.0878, "theoretical_loss": 4.058796431115489, "tokens_seen": 384040960 }, { "epoch": 0.12, "learning_rate": 0.0008924731182795699, "loss": 0.0894, "theoretical_loss": 4.058486667982986, "tokens_seen": 384303104 }, { "epoch": 0.12, "learning_rate": 0.0008923928743379875, "loss": 0.0873, "theoretical_loss": 4.058177175194148, "tokens_seen": 384565248 }, { "epoch": 0.12, "learning_rate": 0.0008923126303964051, "loss": 0.0883, "theoretical_loss": 4.057867952328994, "tokens_seen": 384827392 }, { "epoch": 0.12, "learning_rate": 0.0008922323864548226, "loss": 0.0897, "theoretical_loss": 4.057558998968479, "tokens_seen": 385089536 }, { "epoch": 0.12, "learning_rate": 0.0008921521425132403, "loss": 0.0903, "theoretical_loss": 4.0572503146944925, "tokens_seen": 385351680 }, { "epoch": 0.12, "learning_rate": 0.0008920718985716579, "loss": 0.0884, "theoretical_loss": 4.056941899089858, "tokens_seen": 385613824 }, { "epoch": 0.12, "learning_rate": 0.0008919916546300755, "loss": 0.0881, "theoretical_loss": 4.056633751738328, "tokens_seen": 385875968 }, { "epoch": 0.12, "learning_rate": 0.000891911410688493, "loss": 0.0874, "theoretical_loss": 4.0563258722245825, "tokens_seen": 386138112 }, { "epoch": 0.12, "learning_rate": 0.0008918311667469107, "loss": 0.0868, "theoretical_loss": 4.056018260134226, "tokens_seen": 386400256 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.001992823090404272, "objective/train/docs_used": 146981, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8857860565185547, "objective/train/original_loss": 1.8857860565185547, "objective/train/theoretical_loss": 4.055710915053783, "objective/train/tokens_used": 407122400, "objective/train/value_avg": -0.007904052734375, "objective/train/value_loss": 0.00022203872504178435, "objective/train/value_max": -0.00026726722717285156, "objective/train/value_min": -0.2406005859375, "objective/train/value_reward_corr": 0.5523747667992205, "objective/train/value_std": 0.01018524169921875, "objective/train/weight_avg": 1.0020960569381714, "objective/train/weighted_lm_loss": 1.8894469738006592, "objective/train/weights_max": 1.1491678953170776, "objective/train/weights_min": 0.3828728199005127, "theoretical_loss": 4.055710915053783, "tokens_seen": 386662400 }, { "epoch": 0.12, "learning_rate": 0.0008917509228053282, "loss": 0.088, "theoretical_loss": 4.055710915053783, "tokens_seen": 386662400 }, { "epoch": 0.12, "learning_rate": 0.0008916706788637457, "loss": 0.0869, "theoretical_loss": 4.055403836570701, "tokens_seen": 386924544 }, { "epoch": 0.12, "learning_rate": 0.0008915904349221634, "loss": 0.0911, "theoretical_loss": 4.05509702427334, "tokens_seen": 387186688 }, { "epoch": 0.12, "learning_rate": 0.0008915101909805809, "loss": 0.0864, "theoretical_loss": 4.054790477750974, "tokens_seen": 387448832 }, { "epoch": 0.12, "learning_rate": 0.0008914299470389986, "loss": 0.0875, "theoretical_loss": 4.054484196593791, "tokens_seen": 387710976 }, { "epoch": 0.12, "learning_rate": 0.0008913497030974162, "loss": 0.0888, "theoretical_loss": 4.054178180392885, "tokens_seen": 387973120 }, { "epoch": 0.12, "learning_rate": 0.0008912694591558338, "loss": 0.0832, "theoretical_loss": 4.053872428740256, "tokens_seen": 388235264 }, { "epoch": 0.12, "learning_rate": 0.0008911892152142514, "loss": 0.0889, "theoretical_loss": 4.053566941228809, "tokens_seen": 388497408 }, { "epoch": 0.12, "learning_rate": 0.0008911089712726689, "loss": 0.0889, "theoretical_loss": 4.053261717452346, "tokens_seen": 388759552 }, { "epoch": 0.12, "learning_rate": 0.0008910287273310865, "loss": 0.0861, "theoretical_loss": 4.052956757005573, "tokens_seen": 389021696 }, { "epoch": 0.12, "learning_rate": 0.0008909484833895041, "loss": 0.086, "theoretical_loss": 4.0526520594840845, "tokens_seen": 389283840 }, { "epoch": 0.12, "learning_rate": 0.0008908682394479217, "loss": 0.0902, "theoretical_loss": 4.052347624484373, "tokens_seen": 389545984 }, { "epoch": 0.12, "learning_rate": 0.0008907879955063392, "loss": 0.087, "theoretical_loss": 4.052043451603818, "tokens_seen": 389808128 }, { "epoch": 0.12, "learning_rate": 0.000890707751564757, "loss": 0.0893, "theoretical_loss": 4.051739540440688, "tokens_seen": 390070272 }, { "epoch": 0.12, "learning_rate": 0.0008906275076231745, "loss": 0.086, "theoretical_loss": 4.0514358905941386, "tokens_seen": 390332416 }, { "epoch": 0.12, "learning_rate": 0.0008905472636815921, "loss": 0.0877, "theoretical_loss": 4.051132501664204, "tokens_seen": 390594560 }, { "epoch": 0.12, "learning_rate": 0.0008904670197400097, "loss": 0.0895, "theoretical_loss": 4.050829373251803, "tokens_seen": 390856704 }, { "epoch": 0.12, "learning_rate": 0.0008903867757984272, "loss": 0.0898, "theoretical_loss": 4.050526504958727, "tokens_seen": 391118848 }, { "epoch": 0.12, "learning_rate": 0.0008903065318568448, "loss": 0.0846, "theoretical_loss": 4.050223896387647, "tokens_seen": 391380992 }, { "epoch": 0.12, "learning_rate": 0.0008902262879152624, "loss": 0.0873, "theoretical_loss": 4.0499215471421035, "tokens_seen": 391643136 }, { "epoch": 0.12, "learning_rate": 0.00089014604397368, "loss": 0.0884, "theoretical_loss": 4.049619456826508, "tokens_seen": 391905280 }, { "epoch": 0.12, "learning_rate": 0.0008900658000320975, "loss": 0.0869, "theoretical_loss": 4.0493176250461405, "tokens_seen": 392167424 }, { "epoch": 0.12, "learning_rate": 0.0008899855560905153, "loss": 0.0874, "theoretical_loss": 4.049016051407145, "tokens_seen": 392429568 }, { "epoch": 0.12, "learning_rate": 0.0008899053121489328, "loss": 0.0863, "theoretical_loss": 4.048714735516527, "tokens_seen": 392691712 }, { "epoch": 0.12, "learning_rate": 0.0008898250682073504, "loss": 0.0904, "theoretical_loss": 4.048413676982155, "tokens_seen": 392953856 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0020599644631147385, "objective/train/docs_used": 149327, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7652794122695923, "objective/train/original_loss": 1.7652791738510132, "objective/train/theoretical_loss": 4.048112875412752, "objective/train/tokens_used": 413676000, "objective/train/value_avg": -0.0103302001953125, "objective/train/value_loss": 0.00044312665704637766, "objective/train/value_max": -0.00033283233642578125, "objective/train/value_min": -0.95361328125, "objective/train/value_reward_corr": 0.6481849685057344, "objective/train/value_std": 0.0183258056640625, "objective/train/weight_avg": 1.002259373664856, "objective/train/weighted_lm_loss": 1.7693637609481812, "objective/train/weights_max": 1.8321107625961304, "objective/train/weights_min": 0.36900386214256287, "theoretical_loss": 4.048112875412752, "tokens_seen": 393216000 }, { "epoch": 0.12, "learning_rate": 0.000889744824265768, "loss": 0.0873, "theoretical_loss": 4.048112875412752, "tokens_seen": 393216000 }, { "epoch": 0.12, "learning_rate": 0.0008896645803241855, "loss": 0.0889, "theoretical_loss": 4.0478123304179, "tokens_seen": 393478144 }, { "epoch": 0.12, "learning_rate": 0.0008895843363826032, "loss": 0.0903, "theoretical_loss": 4.047512041608029, "tokens_seen": 393740288 }, { "epoch": 0.12, "learning_rate": 0.0008895040924410207, "loss": 0.0887, "theoretical_loss": 4.047212008594424, "tokens_seen": 394002432 }, { "epoch": 0.12, "learning_rate": 0.0008894238484994383, "loss": 0.0872, "theoretical_loss": 4.046912230989214, "tokens_seen": 394264576 }, { "epoch": 0.12, "learning_rate": 0.0008893436045578559, "loss": 0.0895, "theoretical_loss": 4.0466127084053785, "tokens_seen": 394526720 }, { "epoch": 0.12, "learning_rate": 0.0008892633606162734, "loss": 0.0879, "theoretical_loss": 4.046313440456733, "tokens_seen": 394788864 }, { "epoch": 0.12, "learning_rate": 0.0008891831166746911, "loss": 0.0888, "theoretical_loss": 4.0460144267579405, "tokens_seen": 395051008 }, { "epoch": 0.12, "learning_rate": 0.0008891028727331087, "loss": 0.0898, "theoretical_loss": 4.045715666924499, "tokens_seen": 395313152 }, { "epoch": 0.12, "learning_rate": 0.0008890226287915263, "loss": 0.0879, "theoretical_loss": 4.045417160572743, "tokens_seen": 395575296 }, { "epoch": 0.12, "learning_rate": 0.0008889423848499438, "loss": 0.0899, "theoretical_loss": 4.045118907319839, "tokens_seen": 395837440 }, { "epoch": 0.12, "learning_rate": 0.0008888621409083615, "loss": 0.0865, "theoretical_loss": 4.04482090678379, "tokens_seen": 396099584 }, { "epoch": 0.12, "learning_rate": 0.000888781896966779, "loss": 0.0908, "theoretical_loss": 4.044523158583421, "tokens_seen": 396361728 }, { "epoch": 0.12, "learning_rate": 0.0008887016530251965, "loss": 0.0891, "theoretical_loss": 4.044225662338388, "tokens_seen": 396623872 }, { "epoch": 0.12, "learning_rate": 0.0008886214090836142, "loss": 0.0882, "theoretical_loss": 4.04392841766917, "tokens_seen": 396886016 }, { "epoch": 0.12, "learning_rate": 0.0008885411651420317, "loss": 0.0861, "theoretical_loss": 4.043631424197067, "tokens_seen": 397148160 }, { "epoch": 0.12, "learning_rate": 0.0008884609212004495, "loss": 0.0896, "theoretical_loss": 4.0433346815442, "tokens_seen": 397410304 }, { "epoch": 0.12, "learning_rate": 0.000888380677258867, "loss": 0.0912, "theoretical_loss": 4.043038189333508, "tokens_seen": 397672448 }, { "epoch": 0.12, "learning_rate": 0.0008883004333172846, "loss": 0.0917, "theoretical_loss": 4.042741947188741, "tokens_seen": 397934592 }, { "epoch": 0.12, "learning_rate": 0.0008882201893757022, "loss": 0.0874, "theoretical_loss": 4.042445954734466, "tokens_seen": 398196736 }, { "epoch": 0.12, "learning_rate": 0.0008881399454341197, "loss": 0.0925, "theoretical_loss": 4.0421502115960575, "tokens_seen": 398458880 }, { "epoch": 0.12, "learning_rate": 0.0008880597014925373, "loss": 0.0852, "theoretical_loss": 4.0418547173997, "tokens_seen": 398721024 }, { "epoch": 0.12, "learning_rate": 0.0008879794575509549, "loss": 0.0883, "theoretical_loss": 4.041559471772382, "tokens_seen": 398983168 }, { "epoch": 0.12, "learning_rate": 0.0008878992136093725, "loss": 0.0904, "theoretical_loss": 4.041264474341896, "tokens_seen": 399245312 }, { "epoch": 0.12, "learning_rate": 0.00088781896966779, "loss": 0.0888, "theoretical_loss": 4.040969724736838, "tokens_seen": 399507456 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.00207437202334404, "objective/train/docs_used": 151634, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6508839130401611, "objective/train/original_loss": 1.6508839130401611, "objective/train/theoretical_loss": 4.040675222586599, "objective/train/tokens_used": 420229600, "objective/train/value_avg": -0.00807952880859375, "objective/train/value_loss": 0.00020788678375538439, "objective/train/value_max": -0.00013136863708496094, "objective/train/value_min": -0.88134765625, "objective/train/value_reward_corr": 0.729374600933435, "objective/train/value_std": 0.015289306640625, "objective/train/weight_avg": 1.0021743774414062, "objective/train/weighted_lm_loss": 1.6536670923233032, "objective/train/weights_max": 1.6075432300567627, "objective/train/weights_min": 0.5353213548660278, "theoretical_loss": 4.040675222586599, "tokens_seen": 399769600 }, { "epoch": 0.12, "learning_rate": 0.0008877387257262078, "loss": 0.086, "theoretical_loss": 4.040675222586599, "tokens_seen": 399769600 }, { "epoch": 0.12, "learning_rate": 0.0008876584817846253, "loss": 0.0851, "theoretical_loss": 4.04038096752137, "tokens_seen": 400031744 }, { "epoch": 0.12, "learning_rate": 0.0008875782378430429, "loss": 0.0882, "theoretical_loss": 4.040086959172136, "tokens_seen": 400293888 }, { "epoch": 0.12, "learning_rate": 0.0008874979939014605, "loss": 0.0843, "theoretical_loss": 4.039793197170672, "tokens_seen": 400556032 }, { "epoch": 0.12, "learning_rate": 0.000887417749959878, "loss": 0.0894, "theoretical_loss": 4.039499681149547, "tokens_seen": 400818176 }, { "epoch": 0.12, "learning_rate": 0.0008873375060182957, "loss": 0.0886, "theoretical_loss": 4.039206410742114, "tokens_seen": 401080320 }, { "epoch": 0.12, "learning_rate": 0.0008872572620767132, "loss": 0.0864, "theoretical_loss": 4.038913385582515, "tokens_seen": 401342464 }, { "epoch": 0.12, "learning_rate": 0.0008871770181351308, "loss": 0.0867, "theoretical_loss": 4.038620605305673, "tokens_seen": 401604608 }, { "epoch": 0.12, "learning_rate": 0.0008870967741935484, "loss": 0.0843, "theoretical_loss": 4.038328069547293, "tokens_seen": 401866752 }, { "epoch": 0.12, "learning_rate": 0.000887016530251966, "loss": 0.0877, "theoretical_loss": 4.03803577794386, "tokens_seen": 402128896 }, { "epoch": 0.12, "learning_rate": 0.0008869362863103836, "loss": 0.0886, "theoretical_loss": 4.037743730132635, "tokens_seen": 402391040 }, { "epoch": 0.12, "learning_rate": 0.0008868560423688012, "loss": 0.085, "theoretical_loss": 4.037451925751654, "tokens_seen": 402653184 }, { "epoch": 0.12, "learning_rate": 0.0008867757984272188, "loss": 0.0875, "theoretical_loss": 4.0371603644397265, "tokens_seen": 402915328 }, { "epoch": 0.12, "learning_rate": 0.0008866955544856363, "loss": 0.0896, "theoretical_loss": 4.03686904583643, "tokens_seen": 403177472 }, { "epoch": 0.12, "learning_rate": 0.000886615310544054, "loss": 0.087, "theoretical_loss": 4.036577969582114, "tokens_seen": 403439616 }, { "epoch": 0.12, "learning_rate": 0.0008865350666024715, "loss": 0.0857, "theoretical_loss": 4.03628713531789, "tokens_seen": 403701760 }, { "epoch": 0.12, "learning_rate": 0.0008864548226608891, "loss": 0.089, "theoretical_loss": 4.035996542685638, "tokens_seen": 403963904 }, { "epoch": 0.12, "learning_rate": 0.0008863745787193067, "loss": 0.0859, "theoretical_loss": 4.0357061913279955, "tokens_seen": 404226048 }, { "epoch": 0.12, "learning_rate": 0.0008862943347777242, "loss": 0.0867, "theoretical_loss": 4.035416080888364, "tokens_seen": 404488192 }, { "epoch": 0.12, "learning_rate": 0.0008862140908361419, "loss": 0.0893, "theoretical_loss": 4.035126211010899, "tokens_seen": 404750336 }, { "epoch": 0.12, "learning_rate": 0.0008861338468945595, "loss": 0.0851, "theoretical_loss": 4.034836581340515, "tokens_seen": 405012480 }, { "epoch": 0.12, "learning_rate": 0.0008860536029529771, "loss": 0.0866, "theoretical_loss": 4.034547191522877, "tokens_seen": 405274624 }, { "epoch": 0.12, "learning_rate": 0.0008859733590113947, "loss": 0.0928, "theoretical_loss": 4.034258041204404, "tokens_seen": 405536768 }, { "epoch": 0.12, "learning_rate": 0.0008858931150698123, "loss": 0.0869, "theoretical_loss": 4.033969130032263, "tokens_seen": 405798912 }, { "epoch": 0.12, "learning_rate": 0.0008858128711282298, "loss": 0.0854, "theoretical_loss": 4.033680457654368, "tokens_seen": 406061056 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.001684014918282628, "objective/train/docs_used": 154148, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6580567359924316, "objective/train/original_loss": 1.6580567359924316, "objective/train/theoretical_loss": 4.0333920237193785, "objective/train/tokens_used": 426783200, "objective/train/value_avg": -0.00681304931640625, "objective/train/value_loss": 0.00015735704801045358, "objective/train/value_max": -0.0002269744873046875, "objective/train/value_min": -0.309326171875, "objective/train/value_reward_corr": 0.6036299621134384, "objective/train/value_std": 0.0090484619140625, "objective/train/weight_avg": 1.0017573833465576, "objective/train/weighted_lm_loss": 1.6613595485687256, "objective/train/weights_max": 1.1686903238296509, "objective/train/weights_min": 0.36919885873794556, "theoretical_loss": 4.0333920237193785, "tokens_seen": 406323200 }, { "epoch": 0.12, "learning_rate": 0.0008857326271866474, "loss": 0.0855, "theoretical_loss": 4.0333920237193785, "tokens_seen": 406323200 }, { "epoch": 0.12, "learning_rate": 0.000885652383245065, "loss": 0.0818, "theoretical_loss": 4.0331038278766975, "tokens_seen": 406585344 }, { "epoch": 0.12, "learning_rate": 0.0008855721393034825, "loss": 0.0891, "theoretical_loss": 4.032815869776471, "tokens_seen": 406847488 }, { "epoch": 0.12, "learning_rate": 0.0008854918953619003, "loss": 0.0883, "theoretical_loss": 4.032528149069579, "tokens_seen": 407109632 }, { "epoch": 0.12, "learning_rate": 0.0008854116514203178, "loss": 0.0888, "theoretical_loss": 4.0322406654076435, "tokens_seen": 407371776 }, { "epoch": 0.12, "learning_rate": 0.0008853314074787354, "loss": 0.0886, "theoretical_loss": 4.0319534184430195, "tokens_seen": 407633920 }, { "epoch": 0.12, "learning_rate": 0.000885251163537153, "loss": 0.0864, "theoretical_loss": 4.031666407828795, "tokens_seen": 407896064 }, { "epoch": 0.12, "learning_rate": 0.0008851709195955705, "loss": 0.0863, "theoretical_loss": 4.03137963321879, "tokens_seen": 408158208 }, { "epoch": 0.12, "learning_rate": 0.0008850906756539881, "loss": 0.0886, "theoretical_loss": 4.0310930942675505, "tokens_seen": 408420352 }, { "epoch": 0.12, "learning_rate": 0.0008850104317124057, "loss": 0.0875, "theoretical_loss": 4.030806790630353, "tokens_seen": 408682496 }, { "epoch": 0.12, "learning_rate": 0.0008849301877708233, "loss": 0.0851, "theoretical_loss": 4.030520721963199, "tokens_seen": 408944640 }, { "epoch": 0.12, "learning_rate": 0.0008848499438292408, "loss": 0.0853, "theoretical_loss": 4.030234887922808, "tokens_seen": 409206784 }, { "epoch": 0.12, "learning_rate": 0.0008847696998876586, "loss": 0.0874, "theoretical_loss": 4.029949288166627, "tokens_seen": 409468928 }, { "epoch": 0.12, "learning_rate": 0.0008846894559460761, "loss": 0.0822, "theoretical_loss": 4.0296639223528175, "tokens_seen": 409731072 }, { "epoch": 0.12, "learning_rate": 0.0008846092120044937, "loss": 0.0858, "theoretical_loss": 4.029378790140261, "tokens_seen": 409993216 }, { "epoch": 0.12, "learning_rate": 0.0008845289680629113, "loss": 0.0862, "theoretical_loss": 4.029093891188552, "tokens_seen": 410255360 }, { "epoch": 0.12, "learning_rate": 0.0008844487241213288, "loss": 0.0876, "theoretical_loss": 4.028809225158, "tokens_seen": 410517504 }, { "epoch": 0.12, "learning_rate": 0.0008843684801797465, "loss": 0.0856, "theoretical_loss": 4.028524791709621, "tokens_seen": 410779648 }, { "epoch": 0.12, "learning_rate": 0.000884288236238164, "loss": 0.0863, "theoretical_loss": 4.028240590505148, "tokens_seen": 411041792 }, { "epoch": 0.12, "learning_rate": 0.0008842079922965816, "loss": 0.0859, "theoretical_loss": 4.027956621207015, "tokens_seen": 411303936 }, { "epoch": 0.12, "learning_rate": 0.0008841277483549992, "loss": 0.0858, "theoretical_loss": 4.027672883478364, "tokens_seen": 411566080 }, { "epoch": 0.12, "learning_rate": 0.0008840475044134169, "loss": 0.0892, "theoretical_loss": 4.027389376983041, "tokens_seen": 411828224 }, { "epoch": 0.12, "learning_rate": 0.0008839672604718344, "loss": 0.0893, "theoretical_loss": 4.02710610138559, "tokens_seen": 412090368 }, { "epoch": 0.12, "learning_rate": 0.000883887016530252, "loss": 0.0863, "theoretical_loss": 4.02682305635126, "tokens_seen": 412352512 }, { "epoch": 0.13, "learning_rate": 0.0008838067725886696, "loss": 0.0875, "theoretical_loss": 4.026540241545994, "tokens_seen": 412614656 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.00014230117085389793, "objective/train/docs_used": 156351, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.629351019859314, "objective/train/original_loss": 1.6293511390686035, "objective/train/theoretical_loss": 4.026257656636431, "objective/train/tokens_used": 433336800, "objective/train/value_avg": -0.00789642333984375, "objective/train/value_loss": 0.0005366242257878184, "objective/train/value_max": -0.00022172927856445312, "objective/train/value_min": -0.9091796875, "objective/train/value_reward_corr": 0.6416809694837178, "objective/train/value_std": 0.017822265625, "objective/train/weight_avg": 1.0003734827041626, "objective/train/weighted_lm_loss": 1.628688931465149, "objective/train/weights_max": 1.8231868743896484, "objective/train/weights_min": 0.36822012066841125, "theoretical_loss": 4.026257656636431, "tokens_seen": 412876800 }, { "epoch": 0.13, "learning_rate": 0.0008837265286470871, "loss": 0.0856, "theoretical_loss": 4.026257656636431, "tokens_seen": 412876800 }, { "epoch": 0.13, "learning_rate": 0.0008836462847055048, "loss": 0.0869, "theoretical_loss": 4.025975301289906, "tokens_seen": 413138944 }, { "epoch": 0.13, "learning_rate": 0.0008835660407639223, "loss": 0.0864, "theoretical_loss": 4.025693175174443, "tokens_seen": 413401088 }, { "epoch": 0.13, "learning_rate": 0.00088348579682234, "loss": 0.0836, "theoretical_loss": 4.02541127795876, "tokens_seen": 413663232 }, { "epoch": 0.13, "learning_rate": 0.0008834055528807575, "loss": 0.0861, "theoretical_loss": 4.02512960931226, "tokens_seen": 413925376 }, { "epoch": 0.13, "learning_rate": 0.000883325308939175, "loss": 0.0831, "theoretical_loss": 4.024848168905035, "tokens_seen": 414187520 }, { "epoch": 0.13, "learning_rate": 0.0008832450649975928, "loss": 0.0864, "theoretical_loss": 4.02456695640786, "tokens_seen": 414449664 }, { "epoch": 0.13, "learning_rate": 0.0008831648210560103, "loss": 0.0855, "theoretical_loss": 4.0242859714921915, "tokens_seen": 414711808 }, { "epoch": 0.13, "learning_rate": 0.0008830845771144279, "loss": 0.0886, "theoretical_loss": 4.024005213830171, "tokens_seen": 414973952 }, { "epoch": 0.13, "learning_rate": 0.0008830043331728455, "loss": 0.0833, "theoretical_loss": 4.023724683094615, "tokens_seen": 415236096 }, { "epoch": 0.13, "learning_rate": 0.0008829240892312631, "loss": 0.0833, "theoretical_loss": 4.023444378959019, "tokens_seen": 415498240 }, { "epoch": 0.13, "learning_rate": 0.0008828438452896806, "loss": 0.085, "theoretical_loss": 4.023164301097555, "tokens_seen": 415760384 }, { "epoch": 0.13, "learning_rate": 0.0008827636013480982, "loss": 0.0854, "theoretical_loss": 4.0228844491850655, "tokens_seen": 416022528 }, { "epoch": 0.13, "learning_rate": 0.0008826833574065158, "loss": 0.0873, "theoretical_loss": 4.022604822897068, "tokens_seen": 416284672 }, { "epoch": 0.13, "learning_rate": 0.0008826031134649333, "loss": 0.0872, "theoretical_loss": 4.0223254219097475, "tokens_seen": 416546816 }, { "epoch": 0.13, "learning_rate": 0.0008825228695233511, "loss": 0.0875, "theoretical_loss": 4.022046245899958, "tokens_seen": 416808960 }, { "epoch": 0.13, "learning_rate": 0.0008824426255817686, "loss": 0.0856, "theoretical_loss": 4.021767294545221, "tokens_seen": 417071104 }, { "epoch": 0.13, "learning_rate": 0.0008823623816401862, "loss": 0.0872, "theoretical_loss": 4.021488567523721, "tokens_seen": 417333248 }, { "epoch": 0.13, "learning_rate": 0.0008822821376986038, "loss": 0.087, "theoretical_loss": 4.021210064514305, "tokens_seen": 417595392 }, { "epoch": 0.13, "learning_rate": 0.0008822018937570213, "loss": 0.0833, "theoretical_loss": 4.020931785196484, "tokens_seen": 417857536 }, { "epoch": 0.13, "learning_rate": 0.000882121649815439, "loss": 0.0849, "theoretical_loss": 4.020653729250424, "tokens_seen": 418119680 }, { "epoch": 0.13, "learning_rate": 0.0008820414058738565, "loss": 0.0847, "theoretical_loss": 4.020375896356951, "tokens_seen": 418381824 }, { "epoch": 0.13, "learning_rate": 0.0008819611619322741, "loss": 0.0905, "theoretical_loss": 4.0200982861975465, "tokens_seen": 418643968 }, { "epoch": 0.13, "learning_rate": 0.0008818809179906917, "loss": 0.0879, "theoretical_loss": 4.019820898454345, "tokens_seen": 418906112 }, { "epoch": 0.13, "learning_rate": 0.0008818006740491094, "loss": 0.0838, "theoretical_loss": 4.019543732810134, "tokens_seen": 419168256 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0006775321089662611, "objective/train/docs_used": 158751, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7296289205551147, "objective/train/original_loss": 1.7296291589736938, "objective/train/theoretical_loss": 4.019266788948352, "objective/train/tokens_used": 439890400, "objective/train/value_avg": -0.00728607177734375, "objective/train/value_loss": 0.0003692489117383957, "objective/train/value_max": -0.0002613067626953125, "objective/train/value_min": -0.51171875, "objective/train/value_reward_corr": 0.557088638783727, "objective/train/value_std": 0.01107025146484375, "objective/train/weight_avg": 1.0008379220962524, "objective/train/weighted_lm_loss": 1.7313810586929321, "objective/train/weights_max": 1.540909767150879, "objective/train/weights_min": 0.373028963804245, "theoretical_loss": 4.019266788948352, "tokens_seen": 419430400 }, { "epoch": 0.13, "learning_rate": 0.0008817204301075269, "loss": 0.0871, "theoretical_loss": 4.019266788948352, "tokens_seen": 419430400 }, { "epoch": 0.13, "learning_rate": 0.0008816401861659445, "loss": 0.0896, "theoretical_loss": 4.0189900665530836, "tokens_seen": 419692544 }, { "epoch": 0.13, "learning_rate": 0.0008815599422243621, "loss": 0.0877, "theoretical_loss": 4.0187135653090635, "tokens_seen": 419954688 }, { "epoch": 0.13, "learning_rate": 0.0008814796982827796, "loss": 0.0827, "theoretical_loss": 4.018437284901671, "tokens_seen": 420216832 }, { "epoch": 0.13, "learning_rate": 0.0008813994543411973, "loss": 0.0896, "theoretical_loss": 4.018161225016926, "tokens_seen": 420478976 }, { "epoch": 0.13, "learning_rate": 0.0008813192103996148, "loss": 0.0875, "theoretical_loss": 4.0178853853414935, "tokens_seen": 420741120 }, { "epoch": 0.13, "learning_rate": 0.0008812389664580324, "loss": 0.087, "theoretical_loss": 4.017609765562678, "tokens_seen": 421003264 }, { "epoch": 0.13, "learning_rate": 0.00088115872251645, "loss": 0.0871, "theoretical_loss": 4.017334365368422, "tokens_seen": 421265408 }, { "epoch": 0.13, "learning_rate": 0.0008810784785748677, "loss": 0.0888, "theoretical_loss": 4.017059184447303, "tokens_seen": 421527552 }, { "epoch": 0.13, "learning_rate": 0.0008809982346332853, "loss": 0.0875, "theoretical_loss": 4.0167842224885355, "tokens_seen": 421789696 }, { "epoch": 0.13, "learning_rate": 0.0008809179906917028, "loss": 0.0827, "theoretical_loss": 4.016509479181968, "tokens_seen": 422051840 }, { "epoch": 0.13, "learning_rate": 0.0008808377467501204, "loss": 0.0861, "theoretical_loss": 4.016234954218078, "tokens_seen": 422313984 }, { "epoch": 0.13, "learning_rate": 0.000880757502808538, "loss": 0.0855, "theoretical_loss": 4.015960647287975, "tokens_seen": 422576128 }, { "epoch": 0.13, "learning_rate": 0.0008806772588669556, "loss": 0.0874, "theoretical_loss": 4.015686558083396, "tokens_seen": 422838272 }, { "epoch": 0.13, "learning_rate": 0.0008805970149253731, "loss": 0.0878, "theoretical_loss": 4.015412686296704, "tokens_seen": 423100416 }, { "epoch": 0.13, "learning_rate": 0.0008805167709837908, "loss": 0.0869, "theoretical_loss": 4.0151390316208895, "tokens_seen": 423362560 }, { "epoch": 0.13, "learning_rate": 0.0008804365270422083, "loss": 0.0872, "theoretical_loss": 4.014865593749563, "tokens_seen": 423624704 }, { "epoch": 0.13, "learning_rate": 0.0008803562831006258, "loss": 0.0852, "theoretical_loss": 4.014592372376958, "tokens_seen": 423886848 }, { "epoch": 0.13, "learning_rate": 0.0008802760391590436, "loss": 0.0871, "theoretical_loss": 4.014319367197928, "tokens_seen": 424148992 }, { "epoch": 0.13, "learning_rate": 0.0008801957952174611, "loss": 0.0905, "theoretical_loss": 4.014046577907946, "tokens_seen": 424411136 }, { "epoch": 0.13, "learning_rate": 0.0008801155512758787, "loss": 0.0886, "theoretical_loss": 4.013774004203099, "tokens_seen": 424673280 }, { "epoch": 0.13, "learning_rate": 0.0008800353073342963, "loss": 0.0873, "theoretical_loss": 4.013501645780092, "tokens_seen": 424935424 }, { "epoch": 0.13, "learning_rate": 0.0008799550633927139, "loss": 0.0835, "theoretical_loss": 4.013229502336242, "tokens_seen": 425197568 }, { "epoch": 0.13, "learning_rate": 0.0008798748194511314, "loss": 0.0863, "theoretical_loss": 4.0129575735694765, "tokens_seen": 425459712 }, { "epoch": 0.13, "learning_rate": 0.000879794575509549, "loss": 0.0891, "theoretical_loss": 4.012685859178337, "tokens_seen": 425721856 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.001993941143155098, "objective/train/docs_used": 161088, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6738786697387695, "objective/train/original_loss": 1.6738786697387695, "objective/train/theoretical_loss": 4.012414358861969, "objective/train/tokens_used": 446444000, "objective/train/value_avg": -0.0080413818359375, "objective/train/value_loss": 0.00036461447598412633, "objective/train/value_max": -0.00036263465881347656, "objective/train/value_min": -0.89501953125, "objective/train/value_reward_corr": 0.7342816416076462, "objective/train/value_std": 0.018157958984375, "objective/train/weight_avg": 1.0021560192108154, "objective/train/weighted_lm_loss": 1.676504373550415, "objective/train/weights_max": 1.6657140254974365, "objective/train/weights_min": 0.3717136085033417, "theoretical_loss": 4.012414358861969, "tokens_seen": 425984000 }, { "epoch": 0.13, "learning_rate": 0.0008797143315679666, "loss": 0.0842, "theoretical_loss": 4.012414358861969, "tokens_seen": 425984000 }, { "epoch": 0.13, "learning_rate": 0.0008796340876263842, "loss": 0.0868, "theoretical_loss": 4.01214307232013, "tokens_seen": 426246144 }, { "epoch": 0.13, "learning_rate": 0.0008795538436848019, "loss": 0.0863, "theoretical_loss": 4.011871999253178, "tokens_seen": 426508288 }, { "epoch": 0.13, "learning_rate": 0.0008794735997432194, "loss": 0.0883, "theoretical_loss": 4.011601139362078, "tokens_seen": 426770432 }, { "epoch": 0.13, "learning_rate": 0.0008793933558016371, "loss": 0.0852, "theoretical_loss": 4.011330492348397, "tokens_seen": 427032576 }, { "epoch": 0.13, "learning_rate": 0.0008793131118600546, "loss": 0.0891, "theoretical_loss": 4.0110600579143, "tokens_seen": 427294720 }, { "epoch": 0.13, "learning_rate": 0.0008792328679184721, "loss": 0.0866, "theoretical_loss": 4.010789835762555, "tokens_seen": 427556864 }, { "epoch": 0.13, "learning_rate": 0.0008791526239768898, "loss": 0.0888, "theoretical_loss": 4.010519825596525, "tokens_seen": 427819008 }, { "epoch": 0.13, "learning_rate": 0.0008790723800353073, "loss": 0.086, "theoretical_loss": 4.010250027120169, "tokens_seen": 428081152 }, { "epoch": 0.13, "learning_rate": 0.0008789921360937249, "loss": 0.0852, "theoretical_loss": 4.009980440038043, "tokens_seen": 428343296 }, { "epoch": 0.13, "learning_rate": 0.0008789118921521425, "loss": 0.0889, "theoretical_loss": 4.009711064055291, "tokens_seen": 428605440 }, { "epoch": 0.13, "learning_rate": 0.0008788316482105602, "loss": 0.0852, "theoretical_loss": 4.009441898877652, "tokens_seen": 428867584 }, { "epoch": 0.13, "learning_rate": 0.0008787514042689777, "loss": 0.0852, "theoretical_loss": 4.009172944211455, "tokens_seen": 429129728 }, { "epoch": 0.13, "learning_rate": 0.0008786711603273953, "loss": 0.087, "theoretical_loss": 4.008904199763615, "tokens_seen": 429391872 }, { "epoch": 0.13, "learning_rate": 0.0008785909163858129, "loss": 0.0851, "theoretical_loss": 4.008635665241635, "tokens_seen": 429654016 }, { "epoch": 0.13, "learning_rate": 0.0008785106724442304, "loss": 0.0883, "theoretical_loss": 4.008367340353602, "tokens_seen": 429916160 }, { "epoch": 0.13, "learning_rate": 0.0008784304285026481, "loss": 0.0863, "theoretical_loss": 4.008099224808188, "tokens_seen": 430178304 }, { "epoch": 0.13, "learning_rate": 0.0008783501845610656, "loss": 0.0837, "theoretical_loss": 4.007831318314645, "tokens_seen": 430440448 }, { "epoch": 0.13, "learning_rate": 0.0008782699406194833, "loss": 0.0846, "theoretical_loss": 4.00756362058281, "tokens_seen": 430702592 }, { "epoch": 0.13, "learning_rate": 0.0008781896966779008, "loss": 0.0839, "theoretical_loss": 4.007296131323094, "tokens_seen": 430964736 }, { "epoch": 0.13, "learning_rate": 0.0008781094527363185, "loss": 0.0866, "theoretical_loss": 4.007028850246487, "tokens_seen": 431226880 }, { "epoch": 0.13, "learning_rate": 0.0008780292087947361, "loss": 0.0861, "theoretical_loss": 4.006761777064557, "tokens_seen": 431489024 }, { "epoch": 0.13, "learning_rate": 0.0008779489648531536, "loss": 0.0854, "theoretical_loss": 4.006494911489444, "tokens_seen": 431751168 }, { "epoch": 0.13, "learning_rate": 0.0008778687209115712, "loss": 0.086, "theoretical_loss": 4.006228253233864, "tokens_seen": 432013312 }, { "epoch": 0.13, "learning_rate": 0.0008777884769699888, "loss": 0.088, "theoretical_loss": 4.0059618020111, "tokens_seen": 432275456 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0015352979535236955, "objective/train/docs_used": 163281, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.734284520149231, "objective/train/original_loss": 1.7342844009399414, "objective/train/theoretical_loss": 4.00569555753501, "objective/train/tokens_used": 452997600, "objective/train/value_avg": -0.006992340087890625, "objective/train/value_loss": 0.00020126513845752925, "objective/train/value_max": -0.0002913475036621094, "objective/train/value_min": -0.54833984375, "objective/train/value_reward_corr": 0.5731349995282375, "objective/train/value_std": 0.01129913330078125, "objective/train/weight_avg": 1.0016310214996338, "objective/train/weighted_lm_loss": 1.7371350526809692, "objective/train/weights_max": 1.6854356527328491, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 4.00569555753501, "tokens_seen": 432537600 }, { "epoch": 0.13, "learning_rate": 0.0008777082330284064, "loss": 0.0868, "theoretical_loss": 4.00569555753501, "tokens_seen": 432537600 }, { "epoch": 0.13, "learning_rate": 0.0008776279890868239, "loss": 0.0854, "theoretical_loss": 4.0054295195200185, "tokens_seen": 432799744 }, { "epoch": 0.13, "learning_rate": 0.0008775477451452416, "loss": 0.085, "theoretical_loss": 4.005163687681116, "tokens_seen": 433061888 }, { "epoch": 0.13, "learning_rate": 0.0008774675012036591, "loss": 0.085, "theoretical_loss": 4.00489806173386, "tokens_seen": 433324032 }, { "epoch": 0.13, "learning_rate": 0.0008773872572620766, "loss": 0.0889, "theoretical_loss": 4.004632641394372, "tokens_seen": 433586176 }, { "epoch": 0.13, "learning_rate": 0.0008773070133204944, "loss": 0.0868, "theoretical_loss": 4.0043674263793365, "tokens_seen": 433848320 }, { "epoch": 0.13, "learning_rate": 0.0008772267693789119, "loss": 0.0842, "theoretical_loss": 4.004102416405998, "tokens_seen": 434110464 }, { "epoch": 0.13, "learning_rate": 0.0008771465254373296, "loss": 0.0853, "theoretical_loss": 4.0038376111921625, "tokens_seen": 434372608 }, { "epoch": 0.13, "learning_rate": 0.0008770662814957471, "loss": 0.0872, "theoretical_loss": 4.0035730104561935, "tokens_seen": 434634752 }, { "epoch": 0.13, "learning_rate": 0.0008769860375541647, "loss": 0.0888, "theoretical_loss": 4.003308613917012, "tokens_seen": 434896896 }, { "epoch": 0.13, "learning_rate": 0.0008769057936125823, "loss": 0.0873, "theoretical_loss": 4.003044421294094, "tokens_seen": 435159040 }, { "epoch": 0.13, "learning_rate": 0.0008768255496709998, "loss": 0.0852, "theoretical_loss": 4.002780432307468, "tokens_seen": 435421184 }, { "epoch": 0.13, "learning_rate": 0.0008767453057294174, "loss": 0.0853, "theoretical_loss": 4.0025166466777184, "tokens_seen": 435683328 }, { "epoch": 0.13, "learning_rate": 0.000876665061787835, "loss": 0.0868, "theoretical_loss": 4.00225306412598, "tokens_seen": 435945472 }, { "epoch": 0.13, "learning_rate": 0.0008765848178462527, "loss": 0.0869, "theoretical_loss": 4.001989684373934, "tokens_seen": 436207616 }, { "epoch": 0.13, "learning_rate": 0.0008765045739046702, "loss": 0.0863, "theoretical_loss": 4.0017265071438155, "tokens_seen": 436469760 }, { "epoch": 0.13, "learning_rate": 0.0008764243299630879, "loss": 0.0863, "theoretical_loss": 4.001463532158402, "tokens_seen": 436731904 }, { "epoch": 0.13, "learning_rate": 0.0008763440860215054, "loss": 0.0864, "theoretical_loss": 4.001200759141019, "tokens_seen": 436994048 }, { "epoch": 0.13, "learning_rate": 0.0008762638420799229, "loss": 0.0857, "theoretical_loss": 4.000938187815535, "tokens_seen": 437256192 }, { "epoch": 0.13, "learning_rate": 0.0008761835981383406, "loss": 0.0852, "theoretical_loss": 4.000675817906362, "tokens_seen": 437518336 }, { "epoch": 0.13, "learning_rate": 0.0008761033541967581, "loss": 0.0852, "theoretical_loss": 4.000413649138453, "tokens_seen": 437780480 }, { "epoch": 0.13, "learning_rate": 0.0008760231102551757, "loss": 0.0871, "theoretical_loss": 4.000151681237301, "tokens_seen": 438042624 }, { "epoch": 0.13, "learning_rate": 0.0008759428663135933, "loss": 0.0851, "theoretical_loss": 3.9998899139289392, "tokens_seen": 438304768 }, { "epoch": 0.13, "learning_rate": 0.000875862622372011, "loss": 0.0855, "theoretical_loss": 3.999628346939934, "tokens_seen": 438566912 }, { "epoch": 0.13, "learning_rate": 0.0008757823784304286, "loss": 0.088, "theoretical_loss": 3.9993669799973928, "tokens_seen": 438829056 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0022315175738185644, "objective/train/docs_used": 165818, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.725619912147522, "objective/train/original_loss": 1.725620150566101, "objective/train/theoretical_loss": 3.9991058128289536, "objective/train/tokens_used": 459551200, "objective/train/value_avg": -0.009033203125, "objective/train/value_loss": 0.0003572371497284621, "objective/train/value_max": -0.0001780986785888672, "objective/train/value_min": -0.8076171875, "objective/train/value_reward_corr": 0.7491131113466319, "objective/train/value_std": 0.020477294921875, "objective/train/weight_avg": 1.002398133277893, "objective/train/weighted_lm_loss": 1.7294703722000122, "objective/train/weights_max": 1.4387959241867065, "objective/train/weights_min": 0.41180431842803955, "theoretical_loss": 3.9991058128289536, "tokens_seen": 439091200 }, { "epoch": 0.13, "learning_rate": 0.0008757021344888461, "loss": 0.0852, "theoretical_loss": 3.9991058128289536, "tokens_seen": 439091200 }, { "epoch": 0.13, "learning_rate": 0.0008756218905472637, "loss": 0.0893, "theoretical_loss": 3.998844845162789, "tokens_seen": 439353344 }, { "epoch": 0.13, "learning_rate": 0.0008755416466056813, "loss": 0.0869, "theoretical_loss": 3.998584076727604, "tokens_seen": 439615488 }, { "epoch": 0.13, "learning_rate": 0.0008754614026640989, "loss": 0.0882, "theoretical_loss": 3.998323507252633, "tokens_seen": 439877632 }, { "epoch": 0.13, "learning_rate": 0.0008753811587225164, "loss": 0.0848, "theoretical_loss": 3.998063136467639, "tokens_seen": 440139776 }, { "epoch": 0.13, "learning_rate": 0.0008753009147809341, "loss": 0.0875, "theoretical_loss": 3.9978029641029154, "tokens_seen": 440401920 }, { "epoch": 0.13, "learning_rate": 0.0008752206708393516, "loss": 0.0858, "theoretical_loss": 3.9975429898892783, "tokens_seen": 440664064 }, { "epoch": 0.13, "learning_rate": 0.0008751404268977691, "loss": 0.0866, "theoretical_loss": 3.9972832135580707, "tokens_seen": 440926208 }, { "epoch": 0.13, "learning_rate": 0.0008750601829561869, "loss": 0.0863, "theoretical_loss": 3.9970236348411605, "tokens_seen": 441188352 }, { "epoch": 0.13, "learning_rate": 0.0008749799390146044, "loss": 0.0866, "theoretical_loss": 3.996764253470935, "tokens_seen": 441450496 }, { "epoch": 0.13, "learning_rate": 0.000874899695073022, "loss": 0.0821, "theoretical_loss": 3.996505069180306, "tokens_seen": 441712640 }, { "epoch": 0.13, "learning_rate": 0.0008748194511314396, "loss": 0.0869, "theoretical_loss": 3.9962460817027017, "tokens_seen": 441974784 }, { "epoch": 0.13, "learning_rate": 0.0008747392071898572, "loss": 0.0861, "theoretical_loss": 3.995987290772071, "tokens_seen": 442236928 }, { "epoch": 0.13, "learning_rate": 0.0008746589632482748, "loss": 0.0853, "theoretical_loss": 3.9957286961228786, "tokens_seen": 442499072 }, { "epoch": 0.13, "learning_rate": 0.0008745787193066924, "loss": 0.0835, "theoretical_loss": 3.995470297490106, "tokens_seen": 442761216 }, { "epoch": 0.13, "learning_rate": 0.0008744984753651099, "loss": 0.0836, "theoretical_loss": 3.995212094609249, "tokens_seen": 443023360 }, { "epoch": 0.13, "learning_rate": 0.0008744182314235275, "loss": 0.0854, "theoretical_loss": 3.994954087216315, "tokens_seen": 443285504 }, { "epoch": 0.13, "learning_rate": 0.0008743379874819452, "loss": 0.0881, "theoretical_loss": 3.994696275047825, "tokens_seen": 443547648 }, { "epoch": 0.13, "learning_rate": 0.0008742577435403627, "loss": 0.0857, "theoretical_loss": 3.9944386578408113, "tokens_seen": 443809792 }, { "epoch": 0.13, "learning_rate": 0.0008741774995987804, "loss": 0.0838, "theoretical_loss": 3.9941812353328126, "tokens_seen": 444071936 }, { "epoch": 0.13, "learning_rate": 0.0008740972556571979, "loss": 0.0846, "theoretical_loss": 3.993924007261878, "tokens_seen": 444334080 }, { "epoch": 0.13, "learning_rate": 0.0008740170117156155, "loss": 0.0878, "theoretical_loss": 3.9936669733665617, "tokens_seen": 444596224 }, { "epoch": 0.13, "learning_rate": 0.0008739367677740331, "loss": 0.0857, "theoretical_loss": 3.9934101333859253, "tokens_seen": 444858368 }, { "epoch": 0.13, "learning_rate": 0.0008738565238324506, "loss": 0.0878, "theoretical_loss": 3.9931534870595327, "tokens_seen": 445120512 }, { "epoch": 0.13, "learning_rate": 0.0008737762798908682, "loss": 0.0852, "theoretical_loss": 3.9928970341274517, "tokens_seen": 445382656 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0004395976720843464, "objective/train/docs_used": 168097, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7950403690338135, "objective/train/original_loss": 1.795040249824524, "objective/train/theoretical_loss": 3.992640774330251, "objective/train/tokens_used": 466104800, "objective/train/value_avg": -0.00882720947265625, "objective/train/value_loss": 0.0003522172919474542, "objective/train/value_max": -0.0001823902130126953, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.7499370479136089, "objective/train/value_std": 0.0190887451171875, "objective/train/weight_avg": 1.00059974193573, "objective/train/weighted_lm_loss": 1.795857548713684, "objective/train/weights_max": 1.362145185470581, "objective/train/weights_min": 0.3820091187953949, "theoretical_loss": 3.992640774330251, "tokens_seen": 445644800 }, { "epoch": 0.14, "learning_rate": 0.0008736960359492858, "loss": 0.0857, "theoretical_loss": 3.992640774330251, "tokens_seen": 445644800 }, { "epoch": 0.14, "learning_rate": 0.0008736157920077035, "loss": 0.0836, "theoretical_loss": 3.9923847074090015, "tokens_seen": 445906944 }, { "epoch": 0.14, "learning_rate": 0.000873535548066121, "loss": 0.0831, "theoretical_loss": 3.9921288331052702, "tokens_seen": 446169088 }, { "epoch": 0.14, "learning_rate": 0.0008734553041245387, "loss": 0.0851, "theoretical_loss": 3.991873151161124, "tokens_seen": 446431232 }, { "epoch": 0.14, "learning_rate": 0.0008733750601829562, "loss": 0.0882, "theoretical_loss": 3.9916176613191263, "tokens_seen": 446693376 }, { "epoch": 0.14, "learning_rate": 0.0008732948162413738, "loss": 0.0857, "theoretical_loss": 3.9913623633223354, "tokens_seen": 446955520 }, { "epoch": 0.14, "learning_rate": 0.0008732145722997914, "loss": 0.0828, "theoretical_loss": 3.9911072569143036, "tokens_seen": 447217664 }, { "epoch": 0.14, "learning_rate": 0.0008731343283582089, "loss": 0.084, "theoretical_loss": 3.9908523418390764, "tokens_seen": 447479808 }, { "epoch": 0.14, "learning_rate": 0.0008730540844166266, "loss": 0.084, "theoretical_loss": 3.990597617841191, "tokens_seen": 447741952 }, { "epoch": 0.14, "learning_rate": 0.0008729738404750441, "loss": 0.0838, "theoretical_loss": 3.9903430846656742, "tokens_seen": 448004096 }, { "epoch": 0.14, "learning_rate": 0.0008728935965334618, "loss": 0.0868, "theoretical_loss": 3.990088742058043, "tokens_seen": 448266240 }, { "epoch": 0.14, "learning_rate": 0.0008728133525918794, "loss": 0.0831, "theoretical_loss": 3.9898345897643024, "tokens_seen": 448528384 }, { "epoch": 0.14, "learning_rate": 0.0008727331086502969, "loss": 0.0829, "theoretical_loss": 3.989580627530943, "tokens_seen": 448790528 }, { "epoch": 0.14, "learning_rate": 0.0008726528647087145, "loss": 0.0831, "theoretical_loss": 3.9893268551049417, "tokens_seen": 449052672 }, { "epoch": 0.14, "learning_rate": 0.0008725726207671321, "loss": 0.0845, "theoretical_loss": 3.9890732722337594, "tokens_seen": 449314816 }, { "epoch": 0.14, "learning_rate": 0.0008724923768255497, "loss": 0.0877, "theoretical_loss": 3.988819878665341, "tokens_seen": 449576960 }, { "epoch": 0.14, "learning_rate": 0.0008724121328839672, "loss": 0.0846, "theoretical_loss": 3.988566674148111, "tokens_seen": 449839104 }, { "epoch": 0.14, "learning_rate": 0.0008723318889423849, "loss": 0.0849, "theoretical_loss": 3.988313658430978, "tokens_seen": 450101248 }, { "epoch": 0.14, "learning_rate": 0.0008722516450008024, "loss": 0.0867, "theoretical_loss": 3.9880608312633274, "tokens_seen": 450363392 }, { "epoch": 0.14, "learning_rate": 0.0008721714010592199, "loss": 0.0871, "theoretical_loss": 3.9878081923950237, "tokens_seen": 450625536 }, { "epoch": 0.14, "learning_rate": 0.0008720911571176377, "loss": 0.0848, "theoretical_loss": 3.9875557415764087, "tokens_seen": 450887680 }, { "epoch": 0.14, "learning_rate": 0.0008720109131760552, "loss": 0.083, "theoretical_loss": 3.9873034785582995, "tokens_seen": 451149824 }, { "epoch": 0.14, "learning_rate": 0.0008719306692344729, "loss": 0.0824, "theoretical_loss": 3.9870514030919884, "tokens_seen": 451411968 }, { "epoch": 0.14, "learning_rate": 0.0008718504252928904, "loss": 0.0843, "theoretical_loss": 3.986799514929242, "tokens_seen": 451674112 }, { "epoch": 0.14, "learning_rate": 0.000871770181351308, "loss": 0.087, "theoretical_loss": 3.9865478138222974, "tokens_seen": 451936256 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0014793577138334513, "objective/train/docs_used": 170598, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7309378385543823, "objective/train/original_loss": 1.7309377193450928, "objective/train/theoretical_loss": 3.9862962995238647, "objective/train/tokens_used": 472658400, "objective/train/value_avg": -0.00775146484375, "objective/train/value_loss": 0.0004800742899533361, "objective/train/value_max": -0.00023412704467773438, "objective/train/value_min": -0.78662109375, "objective/train/value_reward_corr": 0.5517175996070474, "objective/train/value_std": 0.0128021240234375, "objective/train/weight_avg": 1.0016835927963257, "objective/train/weighted_lm_loss": 1.7332364320755005, "objective/train/weights_max": 2.0833756923675537, "objective/train/weights_min": 0.368600070476532, "theoretical_loss": 3.9862962995238647, "tokens_seen": 452198400 }, { "epoch": 0.14, "learning_rate": 0.0008716899374097256, "loss": 0.0856, "theoretical_loss": 3.9862962995238647, "tokens_seen": 452198400 }, { "epoch": 0.14, "learning_rate": 0.0008716096934681432, "loss": 0.0879, "theoretical_loss": 3.9860449717871234, "tokens_seen": 452460544 }, { "epoch": 0.14, "learning_rate": 0.0008715294495265607, "loss": 0.0847, "theoretical_loss": 3.9857938303657217, "tokens_seen": 452722688 }, { "epoch": 0.14, "learning_rate": 0.0008714492055849783, "loss": 0.0879, "theoretical_loss": 3.9855428750137754, "tokens_seen": 452984832 }, { "epoch": 0.14, "learning_rate": 0.000871368961643396, "loss": 0.0884, "theoretical_loss": 3.9852921054858665, "tokens_seen": 453246976 }, { "epoch": 0.14, "learning_rate": 0.0008712887177018135, "loss": 0.0839, "theoretical_loss": 3.9850415215370445, "tokens_seen": 453509120 }, { "epoch": 0.14, "learning_rate": 0.0008712084737602312, "loss": 0.0814, "theoretical_loss": 3.984791122922821, "tokens_seen": 453771264 }, { "epoch": 0.14, "learning_rate": 0.0008711282298186487, "loss": 0.0855, "theoretical_loss": 3.98454090939917, "tokens_seen": 454033408 }, { "epoch": 0.14, "learning_rate": 0.0008710479858770663, "loss": 0.0857, "theoretical_loss": 3.984290880722531, "tokens_seen": 454295552 }, { "epoch": 0.14, "learning_rate": 0.0008709677419354839, "loss": 0.0831, "theoretical_loss": 3.9840410366498, "tokens_seen": 454557696 }, { "epoch": 0.14, "learning_rate": 0.0008708874979939014, "loss": 0.0864, "theoretical_loss": 3.983791376938336, "tokens_seen": 454819840 }, { "epoch": 0.14, "learning_rate": 0.0008708072540523191, "loss": 0.0828, "theoretical_loss": 3.983541901345955, "tokens_seen": 455081984 }, { "epoch": 0.14, "learning_rate": 0.0008707270101107366, "loss": 0.0868, "theoretical_loss": 3.983292609630931, "tokens_seen": 455344128 }, { "epoch": 0.14, "learning_rate": 0.0008706467661691543, "loss": 0.0872, "theoretical_loss": 3.9830435015519936, "tokens_seen": 455606272 }, { "epoch": 0.14, "learning_rate": 0.0008705665222275719, "loss": 0.0855, "theoretical_loss": 3.982794576868328, "tokens_seen": 455868416 }, { "epoch": 0.14, "learning_rate": 0.0008704862782859895, "loss": 0.0842, "theoretical_loss": 3.982545835339573, "tokens_seen": 456130560 }, { "epoch": 0.14, "learning_rate": 0.000870406034344407, "loss": 0.0845, "theoretical_loss": 3.982297276725822, "tokens_seen": 456392704 }, { "epoch": 0.14, "learning_rate": 0.0008703257904028246, "loss": 0.0834, "theoretical_loss": 3.9820489007876176, "tokens_seen": 456654848 }, { "epoch": 0.14, "learning_rate": 0.0008702455464612422, "loss": 0.0808, "theoretical_loss": 3.981800707285955, "tokens_seen": 456916992 }, { "epoch": 0.14, "learning_rate": 0.0008701653025196597, "loss": 0.085, "theoretical_loss": 3.9815526959822787, "tokens_seen": 457179136 }, { "epoch": 0.14, "learning_rate": 0.0008700850585780774, "loss": 0.0831, "theoretical_loss": 3.981304866638481, "tokens_seen": 457441280 }, { "epoch": 0.14, "learning_rate": 0.0008700048146364949, "loss": 0.087, "theoretical_loss": 3.9810572190169027, "tokens_seen": 457703424 }, { "epoch": 0.14, "learning_rate": 0.0008699245706949126, "loss": 0.0862, "theoretical_loss": 3.9808097528803295, "tokens_seen": 457965568 }, { "epoch": 0.14, "learning_rate": 0.0008698443267533302, "loss": 0.0858, "theoretical_loss": 3.9805624679919935, "tokens_seen": 458227712 }, { "epoch": 0.14, "learning_rate": 0.0008697640828117477, "loss": 0.0873, "theoretical_loss": 3.98031536411557, "tokens_seen": 458489856 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0019264193251729012, "objective/train/docs_used": 173009, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6288082599639893, "objective/train/original_loss": 1.6288082599639893, "objective/train/theoretical_loss": 3.9800684410151783, "objective/train/tokens_used": 479212000, "objective/train/value_avg": -0.007114410400390625, "objective/train/value_loss": 0.0001740693987812847, "objective/train/value_max": -0.0002002716064453125, "objective/train/value_min": -0.42919921875, "objective/train/value_reward_corr": 0.5126065077385661, "objective/train/value_std": 0.00885772705078125, "objective/train/weight_avg": 1.0020055770874023, "objective/train/weighted_lm_loss": 1.6328537464141846, "objective/train/weights_max": 1.4530192613601685, "objective/train/weights_min": 0.3697379231452942, "theoretical_loss": 3.9800684410151783, "tokens_seen": 458752000 }, { "epoch": 0.14, "learning_rate": 0.0008696838388701653, "loss": 0.0843, "theoretical_loss": 3.9800684410151783, "tokens_seen": 458752000 }, { "epoch": 0.14, "learning_rate": 0.0008696035949285829, "loss": 0.082, "theoretical_loss": 3.979821698455379, "tokens_seen": 459014144 }, { "epoch": 0.14, "learning_rate": 0.0008695233509870005, "loss": 0.0855, "theoretical_loss": 3.9795751362011735, "tokens_seen": 459276288 }, { "epoch": 0.14, "learning_rate": 0.0008694431070454181, "loss": 0.0845, "theoretical_loss": 3.979328754018004, "tokens_seen": 459538432 }, { "epoch": 0.14, "learning_rate": 0.0008693628631038357, "loss": 0.0844, "theoretical_loss": 3.979082551671749, "tokens_seen": 459800576 }, { "epoch": 0.14, "learning_rate": 0.0008692826191622532, "loss": 0.0871, "theoretical_loss": 3.9788365289287286, "tokens_seen": 460062720 }, { "epoch": 0.14, "learning_rate": 0.0008692023752206708, "loss": 0.0873, "theoretical_loss": 3.9785906855556945, "tokens_seen": 460324864 }, { "epoch": 0.14, "learning_rate": 0.0008691221312790885, "loss": 0.0841, "theoretical_loss": 3.9783450213198384, "tokens_seen": 460587008 }, { "epoch": 0.14, "learning_rate": 0.000869041887337506, "loss": 0.0832, "theoretical_loss": 3.9780995359887843, "tokens_seen": 460849152 }, { "epoch": 0.14, "learning_rate": 0.0008689616433959237, "loss": 0.0863, "theoretical_loss": 3.9778542293305894, "tokens_seen": 461111296 }, { "epoch": 0.14, "learning_rate": 0.0008688813994543412, "loss": 0.086, "theoretical_loss": 3.977609101113744, "tokens_seen": 461373440 }, { "epoch": 0.14, "learning_rate": 0.0008688011555127588, "loss": 0.0805, "theoretical_loss": 3.97736415110717, "tokens_seen": 461635584 }, { "epoch": 0.14, "learning_rate": 0.0008687209115711764, "loss": 0.0899, "theoretical_loss": 3.977119379080218, "tokens_seen": 461897728 }, { "epoch": 0.14, "learning_rate": 0.000868640667629594, "loss": 0.088, "theoretical_loss": 3.9768747848026695, "tokens_seen": 462159872 }, { "epoch": 0.14, "learning_rate": 0.0008685604236880115, "loss": 0.0832, "theoretical_loss": 3.9766303680447335, "tokens_seen": 462422016 }, { "epoch": 0.14, "learning_rate": 0.0008684801797464291, "loss": 0.083, "theoretical_loss": 3.9763861285770457, "tokens_seen": 462684160 }, { "epoch": 0.14, "learning_rate": 0.0008683999358048468, "loss": 0.0856, "theoretical_loss": 3.9761420661706683, "tokens_seen": 462946304 }, { "epoch": 0.14, "learning_rate": 0.0008683196918632643, "loss": 0.0836, "theoretical_loss": 3.975898180597089, "tokens_seen": 463208448 }, { "epoch": 0.14, "learning_rate": 0.000868239447921682, "loss": 0.0852, "theoretical_loss": 3.9756544716282187, "tokens_seen": 463470592 }, { "epoch": 0.14, "learning_rate": 0.0008681592039800995, "loss": 0.0834, "theoretical_loss": 3.975410939036392, "tokens_seen": 463732736 }, { "epoch": 0.14, "learning_rate": 0.0008680789600385172, "loss": 0.0849, "theoretical_loss": 3.9751675825943638, "tokens_seen": 463994880 }, { "epoch": 0.14, "learning_rate": 0.0008679987160969347, "loss": 0.0891, "theoretical_loss": 3.974924402075313, "tokens_seen": 464257024 }, { "epoch": 0.14, "learning_rate": 0.0008679184721553522, "loss": 0.0858, "theoretical_loss": 3.9746813972528354, "tokens_seen": 464519168 }, { "epoch": 0.14, "learning_rate": 0.0008678382282137699, "loss": 0.0871, "theoretical_loss": 3.9744385679009486, "tokens_seen": 464781312 }, { "epoch": 0.14, "learning_rate": 0.0008677579842721874, "loss": 0.0829, "theoretical_loss": 3.9741959137940848, "tokens_seen": 465043456 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0013637046795338392, "objective/train/docs_used": 175471, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.55617356300354, "objective/train/original_loss": 1.55617356300354, "objective/train/theoretical_loss": 3.973953434707096, "objective/train/tokens_used": 485765600, "objective/train/value_avg": -0.00836944580078125, "objective/train/value_loss": 0.00027403823332861066, "objective/train/value_max": -0.00031256675720214844, "objective/train/value_min": -0.7216796875, "objective/train/value_reward_corr": 0.5759118889260557, "objective/train/value_std": 0.01276397705078125, "objective/train/weight_avg": 1.0014877319335938, "objective/train/weighted_lm_loss": 1.5572477579116821, "objective/train/weights_max": 1.3698445558547974, "objective/train/weights_min": 0.3708581030368805, "theoretical_loss": 3.973953434707096, "tokens_seen": 465305600 }, { "epoch": 0.14, "learning_rate": 0.0008676777403306051, "loss": 0.0824, "theoretical_loss": 3.973953434707096, "tokens_seen": 465305600 }, { "epoch": 0.14, "learning_rate": 0.0008675974963890227, "loss": 0.0837, "theoretical_loss": 3.973711130415248, "tokens_seen": 465567744 }, { "epoch": 0.14, "learning_rate": 0.0008675172524474403, "loss": 0.0852, "theoretical_loss": 3.973469000694223, "tokens_seen": 465829888 }, { "epoch": 0.14, "learning_rate": 0.0008674370085058578, "loss": 0.0837, "theoretical_loss": 3.973227045320117, "tokens_seen": 466092032 }, { "epoch": 0.14, "learning_rate": 0.0008673567645642754, "loss": 0.085, "theoretical_loss": 3.9729852640694383, "tokens_seen": 466354176 }, { "epoch": 0.14, "learning_rate": 0.000867276520622693, "loss": 0.0853, "theoretical_loss": 3.972743656719107, "tokens_seen": 466616320 }, { "epoch": 0.14, "learning_rate": 0.0008671962766811105, "loss": 0.0815, "theoretical_loss": 3.9725022230464537, "tokens_seen": 466878464 }, { "epoch": 0.14, "learning_rate": 0.0008671160327395282, "loss": 0.0844, "theoretical_loss": 3.9722609628292207, "tokens_seen": 467140608 }, { "epoch": 0.14, "learning_rate": 0.0008670357887979457, "loss": 0.0838, "theoretical_loss": 3.9720198758455574, "tokens_seen": 467402752 }, { "epoch": 0.14, "learning_rate": 0.0008669555448563635, "loss": 0.085, "theoretical_loss": 3.9717789618740227, "tokens_seen": 467664896 }, { "epoch": 0.14, "learning_rate": 0.000866875300914781, "loss": 0.0861, "theoretical_loss": 3.9715382206935814, "tokens_seen": 467927040 }, { "epoch": 0.14, "learning_rate": 0.0008667950569731985, "loss": 0.0818, "theoretical_loss": 3.9712976520836043, "tokens_seen": 468189184 }, { "epoch": 0.14, "learning_rate": 0.0008667148130316162, "loss": 0.0838, "theoretical_loss": 3.971057255823868, "tokens_seen": 468451328 }, { "epoch": 0.14, "learning_rate": 0.0008666345690900337, "loss": 0.0856, "theoretical_loss": 3.9708170316945526, "tokens_seen": 468713472 }, { "epoch": 0.14, "learning_rate": 0.0008665543251484513, "loss": 0.0849, "theoretical_loss": 3.9705769794762418, "tokens_seen": 468975616 }, { "epoch": 0.14, "learning_rate": 0.0008664740812068689, "loss": 0.0862, "theoretical_loss": 3.97033709894992, "tokens_seen": 469237760 }, { "epoch": 0.14, "learning_rate": 0.0008663938372652865, "loss": 0.0846, "theoretical_loss": 3.970097389896975, "tokens_seen": 469499904 }, { "epoch": 0.14, "learning_rate": 0.000866313593323704, "loss": 0.0851, "theoretical_loss": 3.9698578520991936, "tokens_seen": 469762048 }, { "epoch": 0.14, "learning_rate": 0.0008662333493821216, "loss": 0.087, "theoretical_loss": 3.9696184853387617, "tokens_seen": 470024192 }, { "epoch": 0.14, "learning_rate": 0.0008661531054405393, "loss": 0.0832, "theoretical_loss": 3.9693792893982636, "tokens_seen": 470286336 }, { "epoch": 0.14, "learning_rate": 0.0008660728614989568, "loss": 0.0855, "theoretical_loss": 3.969140264060681, "tokens_seen": 470548480 }, { "epoch": 0.14, "learning_rate": 0.0008659926175573745, "loss": 0.0841, "theoretical_loss": 3.9689014091093933, "tokens_seen": 470810624 }, { "epoch": 0.14, "learning_rate": 0.000865912373615792, "loss": 0.0841, "theoretical_loss": 3.9686627243281727, "tokens_seen": 471072768 }, { "epoch": 0.14, "learning_rate": 0.0008658321296742096, "loss": 0.0876, "theoretical_loss": 3.9684242095011886, "tokens_seen": 471334912 }, { "epoch": 0.14, "learning_rate": 0.0008657518857326272, "loss": 0.0839, "theoretical_loss": 3.9681858644130017, "tokens_seen": 471597056 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0014818408526480198, "objective/train/docs_used": 177948, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7183817625045776, "objective/train/original_loss": 1.718381643295288, "objective/train/theoretical_loss": 3.967947688848568, "objective/train/tokens_used": 492319200, "objective/train/value_avg": -0.01105499267578125, "objective/train/value_loss": 0.00045233668060973287, "objective/train/value_max": -0.00037550926208496094, "objective/train/value_min": -0.8310546875, "objective/train/value_reward_corr": 0.7132991610895623, "objective/train/value_std": 0.0223541259765625, "objective/train/weight_avg": 1.001685619354248, "objective/train/weighted_lm_loss": 1.7202060222625732, "objective/train/weights_max": 1.5177611112594604, "objective/train/weights_min": 0.37088075280189514, "theoretical_loss": 3.967947688848568, "tokens_seen": 471859200 }, { "epoch": 0.14, "learning_rate": 0.0008656716417910447, "loss": 0.0866, "theoretical_loss": 3.967947688848568, "tokens_seen": 471859200 }, { "epoch": 0.14, "learning_rate": 0.0008655913978494624, "loss": 0.0803, "theoretical_loss": 3.9677096825932328, "tokens_seen": 472121344 }, { "epoch": 0.14, "learning_rate": 0.0008655111539078799, "loss": 0.0837, "theoretical_loss": 3.9674718454327325, "tokens_seen": 472383488 }, { "epoch": 0.14, "learning_rate": 0.0008654309099662976, "loss": 0.0858, "theoretical_loss": 3.9672341771531956, "tokens_seen": 472645632 }, { "epoch": 0.14, "learning_rate": 0.0008653506660247152, "loss": 0.0856, "theoretical_loss": 3.9669966775411365, "tokens_seen": 472907776 }, { "epoch": 0.14, "learning_rate": 0.0008652704220831328, "loss": 0.0837, "theoretical_loss": 3.96675934638346, "tokens_seen": 473169920 }, { "epoch": 0.14, "learning_rate": 0.0008651901781415503, "loss": 0.0859, "theoretical_loss": 3.9665221834674558, "tokens_seen": 473432064 }, { "epoch": 0.14, "learning_rate": 0.000865109934199968, "loss": 0.0837, "theoretical_loss": 3.9662851885808026, "tokens_seen": 473694208 }, { "epoch": 0.14, "learning_rate": 0.0008650296902583855, "loss": 0.0843, "theoretical_loss": 3.966048361511562, "tokens_seen": 473956352 }, { "epoch": 0.14, "learning_rate": 0.000864949446316803, "loss": 0.0825, "theoretical_loss": 3.96581170204818, "tokens_seen": 474218496 }, { "epoch": 0.14, "learning_rate": 0.0008648692023752207, "loss": 0.0867, "theoretical_loss": 3.9655752099794874, "tokens_seen": 474480640 }, { "epoch": 0.14, "learning_rate": 0.0008647889584336382, "loss": 0.0885, "theoretical_loss": 3.9653388850946976, "tokens_seen": 474742784 }, { "epoch": 0.14, "learning_rate": 0.0008647087144920559, "loss": 0.0836, "theoretical_loss": 3.965102727183404, "tokens_seen": 475004928 }, { "epoch": 0.14, "learning_rate": 0.0008646284705504735, "loss": 0.0829, "theoretical_loss": 3.9648667360355816, "tokens_seen": 475267072 }, { "epoch": 0.14, "learning_rate": 0.0008645482266088911, "loss": 0.0863, "theoretical_loss": 3.9646309114415863, "tokens_seen": 475529216 }, { "epoch": 0.14, "learning_rate": 0.0008644679826673087, "loss": 0.0834, "theoretical_loss": 3.9643952531921505, "tokens_seen": 475791360 }, { "epoch": 0.14, "learning_rate": 0.0008643877387257262, "loss": 0.0814, "theoretical_loss": 3.9641597610783874, "tokens_seen": 476053504 }, { "epoch": 0.14, "learning_rate": 0.0008643074947841438, "loss": 0.0845, "theoretical_loss": 3.9639244348917853, "tokens_seen": 476315648 }, { "epoch": 0.14, "learning_rate": 0.0008642272508425614, "loss": 0.0821, "theoretical_loss": 3.96368927442421, "tokens_seen": 476577792 }, { "epoch": 0.14, "learning_rate": 0.000864147006900979, "loss": 0.0851, "theoretical_loss": 3.9634542794679013, "tokens_seen": 476839936 }, { "epoch": 0.14, "learning_rate": 0.0008640667629593965, "loss": 0.0828, "theoretical_loss": 3.963219449815475, "tokens_seen": 477102080 }, { "epoch": 0.14, "learning_rate": 0.0008639865190178143, "loss": 0.0805, "theoretical_loss": 3.9629847852599207, "tokens_seen": 477364224 }, { "epoch": 0.14, "learning_rate": 0.0008639062750762318, "loss": 0.0868, "theoretical_loss": 3.9627502855945984, "tokens_seen": 477626368 }, { "epoch": 0.14, "learning_rate": 0.0008638260311346493, "loss": 0.0836, "theoretical_loss": 3.962515950613242, "tokens_seen": 477888512 }, { "epoch": 0.14, "learning_rate": 0.000863745787193067, "loss": 0.0858, "theoretical_loss": 3.962281780109957, "tokens_seen": 478150656 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0006618535844609141, "objective/train/docs_used": 180329, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7160143852233887, "objective/train/original_loss": 1.7160143852233887, "objective/train/theoretical_loss": 3.9620477738792164, "objective/train/tokens_used": 498872800, "objective/train/value_avg": -0.006938934326171875, "objective/train/value_loss": 0.0002311340213054791, "objective/train/value_max": -0.00033926963806152344, "objective/train/value_min": -0.716796875, "objective/train/value_reward_corr": 0.5828097197150116, "objective/train/value_std": 0.010406494140625, "objective/train/weight_avg": 1.0007702112197876, "objective/train/weighted_lm_loss": 1.7172032594680786, "objective/train/weights_max": 1.2420917749404907, "objective/train/weights_min": 0.37445467710494995, "theoretical_loss": 3.9620477738792164, "tokens_seen": 478412800 }, { "epoch": 0.14, "learning_rate": 0.0008636655432514845, "loss": 0.0838, "theoretical_loss": 3.9620477738792164, "tokens_seen": 478412800 }, { "epoch": 0.15, "learning_rate": 0.0008635852993099021, "loss": 0.085, "theoretical_loss": 3.9618139317158647, "tokens_seen": 478674944 }, { "epoch": 0.15, "learning_rate": 0.0008635050553683197, "loss": 0.084, "theoretical_loss": 3.961580253415114, "tokens_seen": 478937088 }, { "epoch": 0.15, "learning_rate": 0.0008634248114267373, "loss": 0.084, "theoretical_loss": 3.9613467387725434, "tokens_seen": 479199232 }, { "epoch": 0.15, "learning_rate": 0.0008633445674851548, "loss": 0.0804, "theoretical_loss": 3.9611133875841, "tokens_seen": 479461376 }, { "epoch": 0.15, "learning_rate": 0.0008632643235435724, "loss": 0.085, "theoretical_loss": 3.960880199646096, "tokens_seen": 479723520 }, { "epoch": 0.15, "learning_rate": 0.0008631840796019901, "loss": 0.0857, "theoretical_loss": 3.9606471747552083, "tokens_seen": 479985664 }, { "epoch": 0.15, "learning_rate": 0.0008631038356604077, "loss": 0.0822, "theoretical_loss": 3.9604143127084774, "tokens_seen": 480247808 }, { "epoch": 0.15, "learning_rate": 0.0008630235917188253, "loss": 0.0845, "theoretical_loss": 3.960181613303309, "tokens_seen": 480509952 }, { "epoch": 0.15, "learning_rate": 0.0008629433477772428, "loss": 0.0871, "theoretical_loss": 3.9599490763374687, "tokens_seen": 480772096 }, { "epoch": 0.15, "learning_rate": 0.0008628631038356605, "loss": 0.0853, "theoretical_loss": 3.9597167016090866, "tokens_seen": 481034240 }, { "epoch": 0.15, "learning_rate": 0.000862782859894078, "loss": 0.0825, "theoretical_loss": 3.95948448891665, "tokens_seen": 481296384 }, { "epoch": 0.15, "learning_rate": 0.0008627026159524955, "loss": 0.0835, "theoretical_loss": 3.959252438059009, "tokens_seen": 481558528 }, { "epoch": 0.15, "learning_rate": 0.0008626223720109132, "loss": 0.0839, "theoretical_loss": 3.9590205488353707, "tokens_seen": 481820672 }, { "epoch": 0.15, "learning_rate": 0.0008625421280693307, "loss": 0.0864, "theoretical_loss": 3.958788821045302, "tokens_seen": 482082816 }, { "epoch": 0.15, "learning_rate": 0.0008624618841277484, "loss": 0.0845, "theoretical_loss": 3.958557254488727, "tokens_seen": 482344960 }, { "epoch": 0.15, "learning_rate": 0.000862381640186166, "loss": 0.0836, "theoretical_loss": 3.958325848965925, "tokens_seen": 482607104 }, { "epoch": 0.15, "learning_rate": 0.0008623013962445836, "loss": 0.0829, "theoretical_loss": 3.958094604277532, "tokens_seen": 482869248 }, { "epoch": 0.15, "learning_rate": 0.0008622211523030011, "loss": 0.086, "theoretical_loss": 3.9578635202245387, "tokens_seen": 483131392 }, { "epoch": 0.15, "learning_rate": 0.0008621409083614188, "loss": 0.0803, "theoretical_loss": 3.95763259660829, "tokens_seen": 483393536 }, { "epoch": 0.15, "learning_rate": 0.0008620606644198363, "loss": 0.0825, "theoretical_loss": 3.957401833230484, "tokens_seen": 483655680 }, { "epoch": 0.15, "learning_rate": 0.0008619804204782538, "loss": 0.0829, "theoretical_loss": 3.957171229893171, "tokens_seen": 483917824 }, { "epoch": 0.15, "learning_rate": 0.0008619001765366715, "loss": 0.0842, "theoretical_loss": 3.956940786398753, "tokens_seen": 484179968 }, { "epoch": 0.15, "learning_rate": 0.000861819932595089, "loss": 0.0852, "theoretical_loss": 3.9567105025499827, "tokens_seen": 484442112 }, { "epoch": 0.15, "learning_rate": 0.0008617396886535068, "loss": 0.0835, "theoretical_loss": 3.9564803781499633, "tokens_seen": 484704256 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0013643309939652681, "objective/train/docs_used": 182723, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5896873474121094, "objective/train/original_loss": 1.5896873474121094, "objective/train/theoretical_loss": 3.956250413002146, "objective/train/tokens_used": 505426400, "objective/train/value_avg": -0.00983428955078125, "objective/train/value_loss": 0.00046001793816685677, "objective/train/value_max": -0.0002512931823730469, "objective/train/value_min": -0.94873046875, "objective/train/value_reward_corr": 0.692742976797035, "objective/train/value_std": 0.0214996337890625, "objective/train/weight_avg": 1.0015811920166016, "objective/train/weighted_lm_loss": 1.592099905014038, "objective/train/weights_max": 2.4389986991882324, "objective/train/weights_min": 0.37612104415893555, "theoretical_loss": 3.956250413002146, "tokens_seen": 484966400 }, { "epoch": 0.15, "learning_rate": 0.0008616594447119243, "loss": 0.084, "theoretical_loss": 3.956250413002146, "tokens_seen": 484966400 }, { "epoch": 0.15, "learning_rate": 0.0008615792007703419, "loss": 0.0847, "theoretical_loss": 3.9560206069103314, "tokens_seen": 485228544 }, { "epoch": 0.15, "learning_rate": 0.0008614989568287595, "loss": 0.0855, "theoretical_loss": 3.9557909596786676, "tokens_seen": 485490688 }, { "epoch": 0.15, "learning_rate": 0.000861418712887177, "loss": 0.0836, "theoretical_loss": 3.9555614711116487, "tokens_seen": 485752832 }, { "epoch": 0.15, "learning_rate": 0.0008613384689455946, "loss": 0.0837, "theoretical_loss": 3.9553321410141162, "tokens_seen": 486014976 }, { "epoch": 0.15, "learning_rate": 0.0008612582250040122, "loss": 0.0828, "theoretical_loss": 3.9551029691912545, "tokens_seen": 486277120 }, { "epoch": 0.15, "learning_rate": 0.0008611779810624298, "loss": 0.0839, "theoretical_loss": 3.954873955448594, "tokens_seen": 486539264 }, { "epoch": 0.15, "learning_rate": 0.0008610977371208473, "loss": 0.084, "theoretical_loss": 3.9546450995920086, "tokens_seen": 486801408 }, { "epoch": 0.15, "learning_rate": 0.0008610174931792651, "loss": 0.0823, "theoretical_loss": 3.954416401427715, "tokens_seen": 487063552 }, { "epoch": 0.15, "learning_rate": 0.0008609372492376826, "loss": 0.0821, "theoretical_loss": 3.9541878607622705, "tokens_seen": 487325696 }, { "epoch": 0.15, "learning_rate": 0.0008608570052961001, "loss": 0.0802, "theoretical_loss": 3.9539594774025755, "tokens_seen": 487587840 }, { "epoch": 0.15, "learning_rate": 0.0008607767613545178, "loss": 0.0833, "theoretical_loss": 3.9537312511558698, "tokens_seen": 487849984 }, { "epoch": 0.15, "learning_rate": 0.0008606965174129353, "loss": 0.0821, "theoretical_loss": 3.953503181829732, "tokens_seen": 488112128 }, { "epoch": 0.15, "learning_rate": 0.000860616273471353, "loss": 0.0838, "theoretical_loss": 3.9532752692320816, "tokens_seen": 488374272 }, { "epoch": 0.15, "learning_rate": 0.0008605360295297705, "loss": 0.0807, "theoretical_loss": 3.9530475131711746, "tokens_seen": 488636416 }, { "epoch": 0.15, "learning_rate": 0.0008604557855881881, "loss": 0.0823, "theoretical_loss": 3.9528199134556044, "tokens_seen": 488898560 }, { "epoch": 0.15, "learning_rate": 0.0008603755416466057, "loss": 0.0838, "theoretical_loss": 3.9525924698943022, "tokens_seen": 489160704 }, { "epoch": 0.15, "learning_rate": 0.0008602952977050232, "loss": 0.0829, "theoretical_loss": 3.952365182296533, "tokens_seen": 489422848 }, { "epoch": 0.15, "learning_rate": 0.0008602150537634409, "loss": 0.0842, "theoretical_loss": 3.9521380504718975, "tokens_seen": 489684992 }, { "epoch": 0.15, "learning_rate": 0.0008601348098218585, "loss": 0.0849, "theoretical_loss": 3.9519110742303325, "tokens_seen": 489947136 }, { "epoch": 0.15, "learning_rate": 0.0008600545658802761, "loss": 0.0829, "theoretical_loss": 3.951684253382105, "tokens_seen": 490209280 }, { "epoch": 0.15, "learning_rate": 0.0008599743219386936, "loss": 0.0827, "theoretical_loss": 3.951457587737817, "tokens_seen": 490471424 }, { "epoch": 0.15, "learning_rate": 0.0008598940779971113, "loss": 0.0829, "theoretical_loss": 3.9512310771084014, "tokens_seen": 490733568 }, { "epoch": 0.15, "learning_rate": 0.0008598138340555288, "loss": 0.0846, "theoretical_loss": 3.951004721305123, "tokens_seen": 490995712 }, { "epoch": 0.15, "learning_rate": 0.0008597335901139463, "loss": 0.0828, "theoretical_loss": 3.950778520139576, "tokens_seen": 491257856 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0006203674129210413, "objective/train/docs_used": 185162, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6717313528060913, "objective/train/original_loss": 1.6717313528060913, "objective/train/theoretical_loss": 3.9505524734236848, "objective/train/tokens_used": 511980000, "objective/train/value_avg": -0.00823974609375, "objective/train/value_loss": 0.00022850836103316396, "objective/train/value_max": -0.0002758502960205078, "objective/train/value_min": -0.67724609375, "objective/train/value_reward_corr": 0.6322221181145709, "objective/train/value_std": 0.0125732421875, "objective/train/weight_avg": 1.0007286071777344, "objective/train/weighted_lm_loss": 1.672489881515503, "objective/train/weights_max": 1.3001129627227783, "objective/train/weights_min": 0.372374951839447, "theoretical_loss": 3.9505524734236848, "tokens_seen": 491520000 }, { "epoch": 0.15, "learning_rate": 0.000859653346172364, "loss": 0.0839, "theoretical_loss": 3.9505524734236848, "tokens_seen": 491520000 }, { "epoch": 0.15, "learning_rate": 0.0008595731022307815, "loss": 0.0811, "theoretical_loss": 3.950326580969703, "tokens_seen": 491782144 }, { "epoch": 0.15, "learning_rate": 0.0008594928582891992, "loss": 0.0851, "theoretical_loss": 3.950100842590212, "tokens_seen": 492044288 }, { "epoch": 0.15, "learning_rate": 0.0008594126143476168, "loss": 0.0847, "theoretical_loss": 3.949875258098121, "tokens_seen": 492306432 }, { "epoch": 0.15, "learning_rate": 0.0008593323704060344, "loss": 0.0849, "theoretical_loss": 3.949649827306665, "tokens_seen": 492568576 }, { "epoch": 0.15, "learning_rate": 0.000859252126464452, "loss": 0.084, "theoretical_loss": 3.9494245500294047, "tokens_seen": 492830720 }, { "epoch": 0.15, "learning_rate": 0.0008591718825228696, "loss": 0.0837, "theoretical_loss": 3.949199426080228, "tokens_seen": 493092864 }, { "epoch": 0.15, "learning_rate": 0.0008590916385812871, "loss": 0.0848, "theoretical_loss": 3.9489744552733455, "tokens_seen": 493355008 }, { "epoch": 0.15, "learning_rate": 0.0008590113946397047, "loss": 0.0887, "theoretical_loss": 3.9487496374232913, "tokens_seen": 493617152 }, { "epoch": 0.15, "learning_rate": 0.0008589311506981223, "loss": 0.0828, "theoretical_loss": 3.9485249723449236, "tokens_seen": 493879296 }, { "epoch": 0.15, "learning_rate": 0.0008588509067565398, "loss": 0.0822, "theoretical_loss": 3.9483004598534217, "tokens_seen": 494141440 }, { "epoch": 0.15, "learning_rate": 0.0008587706628149576, "loss": 0.0855, "theoretical_loss": 3.948076099764288, "tokens_seen": 494403584 }, { "epoch": 0.15, "learning_rate": 0.0008586904188733751, "loss": 0.089, "theoretical_loss": 3.947851891893343, "tokens_seen": 494665728 }, { "epoch": 0.15, "learning_rate": 0.0008586101749317927, "loss": 0.0803, "theoretical_loss": 3.9476278360567303, "tokens_seen": 494927872 }, { "epoch": 0.15, "learning_rate": 0.0008585299309902103, "loss": 0.0838, "theoretical_loss": 3.9474039320709107, "tokens_seen": 495190016 }, { "epoch": 0.15, "learning_rate": 0.0008584496870486278, "loss": 0.0867, "theoretical_loss": 3.9471801797526633, "tokens_seen": 495452160 }, { "epoch": 0.15, "learning_rate": 0.0008583694431070454, "loss": 0.0818, "theoretical_loss": 3.946956578919088, "tokens_seen": 495714304 }, { "epoch": 0.15, "learning_rate": 0.000858289199165463, "loss": 0.0841, "theoretical_loss": 3.9467331293875976, "tokens_seen": 495976448 }, { "epoch": 0.15, "learning_rate": 0.0008582089552238806, "loss": 0.0854, "theoretical_loss": 3.9465098309759252, "tokens_seen": 496238592 }, { "epoch": 0.15, "learning_rate": 0.0008581287112822982, "loss": 0.0843, "theoretical_loss": 3.9462866835021178, "tokens_seen": 496500736 }, { "epoch": 0.15, "learning_rate": 0.0008580484673407159, "loss": 0.0838, "theoretical_loss": 3.9460636867845365, "tokens_seen": 496762880 }, { "epoch": 0.15, "learning_rate": 0.0008579682233991334, "loss": 0.0851, "theoretical_loss": 3.9458408406418584, "tokens_seen": 497025024 }, { "epoch": 0.15, "learning_rate": 0.000857887979457551, "loss": 0.0823, "theoretical_loss": 3.945618144893073, "tokens_seen": 497287168 }, { "epoch": 0.15, "learning_rate": 0.0008578077355159686, "loss": 0.0865, "theoretical_loss": 3.9453955993574845, "tokens_seen": 497549312 }, { "epoch": 0.15, "learning_rate": 0.0008577274915743861, "loss": 0.083, "theoretical_loss": 3.945173203854707, "tokens_seen": 497811456 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.00040782641735859215, "objective/train/docs_used": 187487, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7300009727478027, "objective/train/original_loss": 1.7300009727478027, "objective/train/theoretical_loss": 3.9449509582046662, "objective/train/tokens_used": 518533600, "objective/train/value_avg": -0.00957489013671875, "objective/train/value_loss": 0.0005079260445199907, "objective/train/value_max": -0.00016736984252929688, "objective/train/value_min": -0.72998046875, "objective/train/value_reward_corr": 0.5804749067115648, "objective/train/value_std": 0.01552581787109375, "objective/train/weight_avg": 1.0006271600723267, "objective/train/weighted_lm_loss": 1.7296302318572998, "objective/train/weights_max": 1.716912031173706, "objective/train/weights_min": 0.36845633387565613, "theoretical_loss": 3.9449509582046662, "tokens_seen": 498073600 }, { "epoch": 0.15, "learning_rate": 0.0008576472476328038, "loss": 0.0856, "theoretical_loss": 3.9449509582046662, "tokens_seen": 498073600 }, { "epoch": 0.15, "learning_rate": 0.0008575670036912213, "loss": 0.0832, "theoretical_loss": 3.944728862227601, "tokens_seen": 498335744 }, { "epoch": 0.15, "learning_rate": 0.0008574867597496389, "loss": 0.0812, "theoretical_loss": 3.9445069157440575, "tokens_seen": 498597888 }, { "epoch": 0.15, "learning_rate": 0.0008574065158080565, "loss": 0.0846, "theoretical_loss": 3.944285118574893, "tokens_seen": 498860032 }, { "epoch": 0.15, "learning_rate": 0.000857326271866474, "loss": 0.0842, "theoretical_loss": 3.9440634705412725, "tokens_seen": 499122176 }, { "epoch": 0.15, "learning_rate": 0.0008572460279248917, "loss": 0.0851, "theoretical_loss": 3.9438419714646695, "tokens_seen": 499384320 }, { "epoch": 0.15, "learning_rate": 0.0008571657839833093, "loss": 0.0868, "theoretical_loss": 3.9436206211668647, "tokens_seen": 499646464 }, { "epoch": 0.15, "learning_rate": 0.0008570855400417269, "loss": 0.0806, "theoretical_loss": 3.9433994194699453, "tokens_seen": 499908608 }, { "epoch": 0.15, "learning_rate": 0.0008570052961001444, "loss": 0.0834, "theoretical_loss": 3.943178366196304, "tokens_seen": 500170752 }, { "epoch": 0.15, "learning_rate": 0.0008569250521585621, "loss": 0.0847, "theoretical_loss": 3.942957461168639, "tokens_seen": 500432896 }, { "epoch": 0.15, "learning_rate": 0.0008568448082169796, "loss": 0.0869, "theoretical_loss": 3.9427367042099544, "tokens_seen": 500695040 }, { "epoch": 0.15, "learning_rate": 0.0008567645642753972, "loss": 0.0808, "theoretical_loss": 3.942516095143555, "tokens_seen": 500957184 }, { "epoch": 0.15, "learning_rate": 0.0008566843203338148, "loss": 0.0846, "theoretical_loss": 3.9422956337930524, "tokens_seen": 501219328 }, { "epoch": 0.15, "learning_rate": 0.0008566040763922323, "loss": 0.0818, "theoretical_loss": 3.942075319982358, "tokens_seen": 501481472 }, { "epoch": 0.15, "learning_rate": 0.0008565238324506501, "loss": 0.0828, "theoretical_loss": 3.941855153535686, "tokens_seen": 501743616 }, { "epoch": 0.15, "learning_rate": 0.0008564435885090676, "loss": 0.0837, "theoretical_loss": 3.9416351342775524, "tokens_seen": 502005760 }, { "epoch": 0.15, "learning_rate": 0.0008563633445674852, "loss": 0.0825, "theoretical_loss": 3.9414152620327716, "tokens_seen": 502267904 }, { "epoch": 0.15, "learning_rate": 0.0008562831006259028, "loss": 0.0798, "theoretical_loss": 3.941195536626461, "tokens_seen": 502530048 }, { "epoch": 0.15, "learning_rate": 0.0008562028566843203, "loss": 0.0835, "theoretical_loss": 3.940975957884034, "tokens_seen": 502792192 }, { "epoch": 0.15, "learning_rate": 0.0008561226127427379, "loss": 0.0839, "theoretical_loss": 3.9407565256312047, "tokens_seen": 503054336 }, { "epoch": 0.15, "learning_rate": 0.0008560423688011555, "loss": 0.0859, "theoretical_loss": 3.940537239693983, "tokens_seen": 503316480 }, { "epoch": 0.15, "learning_rate": 0.0008559621248595731, "loss": 0.0844, "theoretical_loss": 3.9403180998986778, "tokens_seen": 503578624 }, { "epoch": 0.15, "learning_rate": 0.0008558818809179906, "loss": 0.0805, "theoretical_loss": 3.9400991060718935, "tokens_seen": 503840768 }, { "epoch": 0.15, "learning_rate": 0.0008558016369764084, "loss": 0.0865, "theoretical_loss": 3.93988025804053, "tokens_seen": 504102912 }, { "epoch": 0.15, "learning_rate": 0.0008557213930348259, "loss": 0.0846, "theoretical_loss": 3.9396615556317838, "tokens_seen": 504365056 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.003414377337321639, "objective/train/docs_used": 189911, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.635353684425354, "objective/train/original_loss": 1.6353535652160645, "objective/train/theoretical_loss": 3.939442998673144, "objective/train/tokens_used": 525087200, "objective/train/value_avg": -0.0120849609375, "objective/train/value_loss": 0.0004840447218157351, "objective/train/value_max": -0.0003199577331542969, "objective/train/value_min": -0.7880859375, "objective/train/value_reward_corr": 0.7853448439202004, "objective/train/value_std": 0.0305023193359375, "objective/train/weight_avg": 1.0036317110061646, "objective/train/weighted_lm_loss": 1.6402772665023804, "objective/train/weights_max": 1.392095923423767, "objective/train/weights_min": 0.3688413202762604, "theoretical_loss": 3.939442998673144, "tokens_seen": 504627200 }, { "epoch": 0.15, "learning_rate": 0.0008556411490932436, "loss": 0.0822, "theoretical_loss": 3.939442998673144, "tokens_seen": 504627200 }, { "epoch": 0.15, "learning_rate": 0.0008555609051516611, "loss": 0.0824, "theoretical_loss": 3.9392245869923954, "tokens_seen": 504889344 }, { "epoch": 0.15, "learning_rate": 0.0008554806612100786, "loss": 0.0865, "theoretical_loss": 3.939006320417614, "tokens_seen": 505151488 }, { "epoch": 0.15, "learning_rate": 0.0008554004172684963, "loss": 0.0833, "theoretical_loss": 3.9387881987771705, "tokens_seen": 505413632 }, { "epoch": 0.15, "learning_rate": 0.0008553201733269138, "loss": 0.0847, "theoretical_loss": 3.9385702218997247, "tokens_seen": 505675776 }, { "epoch": 0.15, "learning_rate": 0.0008552399293853314, "loss": 0.0849, "theoretical_loss": 3.9383523896142316, "tokens_seen": 505937920 }, { "epoch": 0.15, "learning_rate": 0.000855159685443749, "loss": 0.0849, "theoretical_loss": 3.9381347017499326, "tokens_seen": 506200064 }, { "epoch": 0.15, "learning_rate": 0.0008550794415021667, "loss": 0.0847, "theoretical_loss": 3.9379171581363623, "tokens_seen": 506462208 }, { "epoch": 0.15, "learning_rate": 0.0008549991975605842, "loss": 0.0846, "theoretical_loss": 3.937699758603342, "tokens_seen": 506724352 }, { "epoch": 0.15, "learning_rate": 0.0008549189536190018, "loss": 0.0845, "theoretical_loss": 3.937482502980985, "tokens_seen": 506986496 }, { "epoch": 0.15, "learning_rate": 0.0008548387096774194, "loss": 0.0873, "theoretical_loss": 3.937265391099688, "tokens_seen": 507248640 }, { "epoch": 0.15, "learning_rate": 0.0008547584657358369, "loss": 0.081, "theoretical_loss": 3.9370484227901397, "tokens_seen": 507510784 }, { "epoch": 0.15, "learning_rate": 0.0008546782217942546, "loss": 0.0815, "theoretical_loss": 3.9368315978833124, "tokens_seen": 507772928 }, { "epoch": 0.15, "learning_rate": 0.0008545979778526721, "loss": 0.0824, "theoretical_loss": 3.936614916210466, "tokens_seen": 508035072 }, { "epoch": 0.15, "learning_rate": 0.0008545177339110897, "loss": 0.0811, "theoretical_loss": 3.9363983776031457, "tokens_seen": 508297216 }, { "epoch": 0.15, "learning_rate": 0.0008544374899695073, "loss": 0.0846, "theoretical_loss": 3.936181981893182, "tokens_seen": 508559360 }, { "epoch": 0.15, "learning_rate": 0.0008543572460279248, "loss": 0.0867, "theoretical_loss": 3.9359657289126875, "tokens_seen": 508821504 }, { "epoch": 0.15, "learning_rate": 0.0008542770020863426, "loss": 0.0841, "theoretical_loss": 3.935749618494061, "tokens_seen": 509083648 }, { "epoch": 0.15, "learning_rate": 0.0008541967581447601, "loss": 0.0836, "theoretical_loss": 3.935533650469983, "tokens_seen": 509345792 }, { "epoch": 0.15, "learning_rate": 0.0008541165142031777, "loss": 0.0818, "theoretical_loss": 3.935317824673417, "tokens_seen": 509607936 }, { "epoch": 0.15, "learning_rate": 0.0008540362702615953, "loss": 0.0842, "theoretical_loss": 3.935102140937608, "tokens_seen": 509870080 }, { "epoch": 0.15, "learning_rate": 0.0008539560263200129, "loss": 0.0829, "theoretical_loss": 3.934886599096081, "tokens_seen": 510132224 }, { "epoch": 0.15, "learning_rate": 0.0008538757823784304, "loss": 0.0819, "theoretical_loss": 3.9346711989826426, "tokens_seen": 510394368 }, { "epoch": 0.15, "learning_rate": 0.000853795538436848, "loss": 0.0832, "theoretical_loss": 3.93445594043138, "tokens_seen": 510656512 }, { "epoch": 0.15, "learning_rate": 0.0008537152944952656, "loss": 0.082, "theoretical_loss": 3.9342408232766584, "tokens_seen": 510918656 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0016618784284219146, "objective/train/docs_used": 192198, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7548766136169434, "objective/train/original_loss": 1.7548766136169434, "objective/train/theoretical_loss": 3.934025847353122, "objective/train/tokens_used": 531640800, "objective/train/value_avg": -0.00916290283203125, "objective/train/value_loss": 0.0003608835977502167, "objective/train/value_max": -0.0002694129943847656, "objective/train/value_min": -0.78173828125, "objective/train/value_reward_corr": 0.6032279595057155, "objective/train/value_std": 0.015838623046875, "objective/train/weight_avg": 1.0018247365951538, "objective/train/weighted_lm_loss": 1.7572846412658691, "objective/train/weights_max": 1.5906000137329102, "objective/train/weights_min": 0.3688792884349823, "theoretical_loss": 3.934025847353122, "tokens_seen": 511180800 }, { "epoch": 0.15, "learning_rate": 0.0008536350505536831, "loss": 0.0836, "theoretical_loss": 3.934025847353122, "tokens_seen": 511180800 }, { "epoch": 0.15, "learning_rate": 0.0008535548066121009, "loss": 0.0818, "theoretical_loss": 3.9338110124956924, "tokens_seen": 511442944 }, { "epoch": 0.16, "learning_rate": 0.0008534745626705184, "loss": 0.0835, "theoretical_loss": 3.9335963185395713, "tokens_seen": 511705088 }, { "epoch": 0.16, "learning_rate": 0.000853394318728936, "loss": 0.0811, "theoretical_loss": 3.933381765320233, "tokens_seen": 511967232 }, { "epoch": 0.16, "learning_rate": 0.0008533140747873536, "loss": 0.0812, "theoretical_loss": 3.933167352673432, "tokens_seen": 512229376 }, { "epoch": 0.16, "learning_rate": 0.0008532338308457711, "loss": 0.0834, "theoretical_loss": 3.9329530804351958, "tokens_seen": 512491520 }, { "epoch": 0.16, "learning_rate": 0.0008531535869041887, "loss": 0.0855, "theoretical_loss": 3.9327389484418287, "tokens_seen": 512753664 }, { "epoch": 0.16, "learning_rate": 0.0008530733429626063, "loss": 0.0804, "theoretical_loss": 3.9325249565299076, "tokens_seen": 513015808 }, { "epoch": 0.16, "learning_rate": 0.0008529930990210239, "loss": 0.0843, "theoretical_loss": 3.932311104536285, "tokens_seen": 513277952 }, { "epoch": 0.16, "learning_rate": 0.0008529128550794415, "loss": 0.0822, "theoretical_loss": 3.9320973922980844, "tokens_seen": 513540096 }, { "epoch": 0.16, "learning_rate": 0.0008528326111378592, "loss": 0.0844, "theoretical_loss": 3.931883819652705, "tokens_seen": 513802240 }, { "epoch": 0.16, "learning_rate": 0.0008527523671962767, "loss": 0.082, "theoretical_loss": 3.9316703864378155, "tokens_seen": 514064384 }, { "epoch": 0.16, "learning_rate": 0.0008526721232546944, "loss": 0.082, "theoretical_loss": 3.9314570924913568, "tokens_seen": 514326528 }, { "epoch": 0.16, "learning_rate": 0.0008525918793131119, "loss": 0.0834, "theoretical_loss": 3.9312439376515407, "tokens_seen": 514588672 }, { "epoch": 0.16, "learning_rate": 0.0008525116353715294, "loss": 0.083, "theoretical_loss": 3.9310309217568493, "tokens_seen": 514850816 }, { "epoch": 0.16, "learning_rate": 0.0008524313914299471, "loss": 0.0845, "theoretical_loss": 3.9308180446460343, "tokens_seen": 515112960 }, { "epoch": 0.16, "learning_rate": 0.0008523511474883646, "loss": 0.0838, "theoretical_loss": 3.9306053061581165, "tokens_seen": 515375104 }, { "epoch": 0.16, "learning_rate": 0.0008522709035467822, "loss": 0.0834, "theoretical_loss": 3.930392706132385, "tokens_seen": 515637248 }, { "epoch": 0.16, "learning_rate": 0.0008521906596051998, "loss": 0.0845, "theoretical_loss": 3.9301802444083966, "tokens_seen": 515899392 }, { "epoch": 0.16, "learning_rate": 0.0008521104156636175, "loss": 0.0846, "theoretical_loss": 3.929967920825977, "tokens_seen": 516161536 }, { "epoch": 0.16, "learning_rate": 0.000852030171722035, "loss": 0.0805, "theoretical_loss": 3.929755735225216, "tokens_seen": 516423680 }, { "epoch": 0.16, "learning_rate": 0.0008519499277804526, "loss": 0.0829, "theoretical_loss": 3.9295436874464715, "tokens_seen": 516685824 }, { "epoch": 0.16, "learning_rate": 0.0008518696838388702, "loss": 0.0836, "theoretical_loss": 3.929331777330366, "tokens_seen": 516947968 }, { "epoch": 0.16, "learning_rate": 0.0008517894398972877, "loss": 0.0798, "theoretical_loss": 3.9291200047177886, "tokens_seen": 517210112 }, { "epoch": 0.16, "learning_rate": 0.0008517091959557054, "loss": 0.0815, "theoretical_loss": 3.9289083694498905, "tokens_seen": 517472256 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.001091600046493113, "objective/train/docs_used": 194581, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6318016052246094, "objective/train/original_loss": 1.6318016052246094, "objective/train/theoretical_loss": 3.9286968713680883, "objective/train/tokens_used": 538194400, "objective/train/value_avg": -0.00958251953125, "objective/train/value_loss": 0.0004281644069124013, "objective/train/value_max": -0.0002453327178955078, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.7146024060573813, "objective/train/value_std": 0.0170135498046875, "objective/train/weight_avg": 1.0012816190719604, "objective/train/weighted_lm_loss": 1.6339389085769653, "objective/train/weights_max": 1.4248768091201782, "objective/train/weights_min": 0.36841943860054016, "theoretical_loss": 3.9286968713680883, "tokens_seen": 517734400 }, { "epoch": 0.16, "learning_rate": 0.0008516289520141229, "loss": 0.0804, "theoretical_loss": 3.9286968713680883, "tokens_seen": 517734400 }, { "epoch": 0.16, "learning_rate": 0.0008515487080725406, "loss": 0.0823, "theoretical_loss": 3.9284855103140615, "tokens_seen": 517996544 }, { "epoch": 0.16, "learning_rate": 0.0008514684641309581, "loss": 0.0822, "theoretical_loss": 3.9282742861297524, "tokens_seen": 518258688 }, { "epoch": 0.16, "learning_rate": 0.0008513882201893756, "loss": 0.0788, "theoretical_loss": 3.928063198657365, "tokens_seen": 518520832 }, { "epoch": 0.16, "learning_rate": 0.0008513079762477934, "loss": 0.0808, "theoretical_loss": 3.9278522477393656, "tokens_seen": 518782976 }, { "epoch": 0.16, "learning_rate": 0.0008512277323062109, "loss": 0.0824, "theoretical_loss": 3.9276414332184815, "tokens_seen": 519045120 }, { "epoch": 0.16, "learning_rate": 0.0008511474883646285, "loss": 0.0818, "theoretical_loss": 3.927430754937699, "tokens_seen": 519307264 }, { "epoch": 0.16, "learning_rate": 0.0008510672444230461, "loss": 0.0795, "theoretical_loss": 3.927220212740267, "tokens_seen": 519569408 }, { "epoch": 0.16, "learning_rate": 0.0008509870004814637, "loss": 0.0795, "theoretical_loss": 3.9270098064696906, "tokens_seen": 519831552 }, { "epoch": 0.16, "learning_rate": 0.0008509067565398812, "loss": 0.0813, "theoretical_loss": 3.9267995359697356, "tokens_seen": 520093696 }, { "epoch": 0.16, "learning_rate": 0.0008508265125982988, "loss": 0.082, "theoretical_loss": 3.926589401084426, "tokens_seen": 520355840 }, { "epoch": 0.16, "learning_rate": 0.0008507462686567164, "loss": 0.0803, "theoretical_loss": 3.9263794016580427, "tokens_seen": 520617984 }, { "epoch": 0.16, "learning_rate": 0.0008506660247151339, "loss": 0.0798, "theoretical_loss": 3.9261695375351238, "tokens_seen": 520880128 }, { "epoch": 0.16, "learning_rate": 0.0008505857807735517, "loss": 0.0839, "theoretical_loss": 3.9259598085604646, "tokens_seen": 521142272 }, { "epoch": 0.16, "learning_rate": 0.0008505055368319692, "loss": 0.0809, "theoretical_loss": 3.925750214579116, "tokens_seen": 521404416 }, { "epoch": 0.16, "learning_rate": 0.0008504252928903869, "loss": 0.0844, "theoretical_loss": 3.9255407554363835, "tokens_seen": 521666560 }, { "epoch": 0.16, "learning_rate": 0.0008503450489488044, "loss": 0.0812, "theoretical_loss": 3.92533143097783, "tokens_seen": 521928704 }, { "epoch": 0.16, "learning_rate": 0.0008502648050072219, "loss": 0.0827, "theoretical_loss": 3.9251222410492694, "tokens_seen": 522190848 }, { "epoch": 0.16, "learning_rate": 0.0008501845610656396, "loss": 0.0829, "theoretical_loss": 3.924913185496772, "tokens_seen": 522452992 }, { "epoch": 0.16, "learning_rate": 0.0008501043171240571, "loss": 0.0813, "theoretical_loss": 3.924704264166659, "tokens_seen": 522715136 }, { "epoch": 0.16, "learning_rate": 0.0008500240731824747, "loss": 0.0816, "theoretical_loss": 3.9244954769055074, "tokens_seen": 522977280 }, { "epoch": 0.16, "learning_rate": 0.0008499438292408923, "loss": 0.0843, "theoretical_loss": 3.924286823560144, "tokens_seen": 523239424 }, { "epoch": 0.16, "learning_rate": 0.00084986358529931, "loss": 0.0805, "theoretical_loss": 3.9240783039776472, "tokens_seen": 523501568 }, { "epoch": 0.16, "learning_rate": 0.0008497833413577275, "loss": 0.0825, "theoretical_loss": 3.9238699180053485, "tokens_seen": 523763712 }, { "epoch": 0.16, "learning_rate": 0.0008497030974161451, "loss": 0.0836, "theoretical_loss": 3.923661665490828, "tokens_seen": 524025856 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0016532277222722769, "objective/train/docs_used": 196964, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5641077756881714, "objective/train/original_loss": 1.5641076564788818, "objective/train/theoretical_loss": 3.9234535462819156, "objective/train/tokens_used": 544748000, "objective/train/value_avg": -0.00901031494140625, "objective/train/value_loss": 0.00023702048929408193, "objective/train/value_max": -0.0002434253692626953, "objective/train/value_min": -0.58154296875, "objective/train/value_reward_corr": 0.6424405895231002, "objective/train/value_std": 0.0155029296875, "objective/train/weight_avg": 1.0017648935317993, "objective/train/weighted_lm_loss": 1.566743016242981, "objective/train/weights_max": 1.7467089891433716, "objective/train/weights_min": 0.3871968686580658, "theoretical_loss": 3.9234535462819156, "tokens_seen": 524288000 }, { "epoch": 0.16, "learning_rate": 0.0008496228534745627, "loss": 0.0815, "theoretical_loss": 3.9234535462819156, "tokens_seen": 524288000 }, { "epoch": 0.16, "learning_rate": 0.0008495426095329802, "loss": 0.0821, "theoretical_loss": 3.923245560226693, "tokens_seen": 524550144 }, { "epoch": 0.16, "learning_rate": 0.0008494623655913979, "loss": 0.0791, "theoretical_loss": 3.9230377071734885, "tokens_seen": 524812288 }, { "epoch": 0.16, "learning_rate": 0.0008493821216498154, "loss": 0.0819, "theoretical_loss": 3.9228299869708794, "tokens_seen": 525074432 }, { "epoch": 0.16, "learning_rate": 0.000849301877708233, "loss": 0.0824, "theoretical_loss": 3.9226223994676923, "tokens_seen": 525336576 }, { "epoch": 0.16, "learning_rate": 0.0008492216337666506, "loss": 0.0852, "theoretical_loss": 3.9224149445129983, "tokens_seen": 525598720 }, { "epoch": 0.16, "learning_rate": 0.0008491413898250683, "loss": 0.0812, "theoretical_loss": 3.922207621956119, "tokens_seen": 525860864 }, { "epoch": 0.16, "learning_rate": 0.0008490611458834859, "loss": 0.0804, "theoretical_loss": 3.9220004316466186, "tokens_seen": 526123008 }, { "epoch": 0.16, "learning_rate": 0.0008489809019419034, "loss": 0.0838, "theoretical_loss": 3.9217933734343093, "tokens_seen": 526385152 }, { "epoch": 0.16, "learning_rate": 0.000848900658000321, "loss": 0.0825, "theoretical_loss": 3.9215864471692488, "tokens_seen": 526647296 }, { "epoch": 0.16, "learning_rate": 0.0008488204140587386, "loss": 0.0816, "theoretical_loss": 3.921379652701738, "tokens_seen": 526909440 }, { "epoch": 0.16, "learning_rate": 0.0008487401701171562, "loss": 0.0808, "theoretical_loss": 3.9211729898823235, "tokens_seen": 527171584 }, { "epoch": 0.16, "learning_rate": 0.0008486599261755737, "loss": 0.082, "theoretical_loss": 3.920966458561794, "tokens_seen": 527433728 }, { "epoch": 0.16, "learning_rate": 0.0008485796822339914, "loss": 0.0813, "theoretical_loss": 3.920760058591182, "tokens_seen": 527695872 }, { "epoch": 0.16, "learning_rate": 0.0008484994382924089, "loss": 0.0826, "theoretical_loss": 3.9205537898217644, "tokens_seen": 527958016 }, { "epoch": 0.16, "learning_rate": 0.0008484191943508264, "loss": 0.0817, "theoretical_loss": 3.920347652105058, "tokens_seen": 528220160 }, { "epoch": 0.16, "learning_rate": 0.0008483389504092442, "loss": 0.0826, "theoretical_loss": 3.920141645292821, "tokens_seen": 528482304 }, { "epoch": 0.16, "learning_rate": 0.0008482587064676617, "loss": 0.081, "theoretical_loss": 3.919935769237055, "tokens_seen": 528744448 }, { "epoch": 0.16, "learning_rate": 0.0008481784625260793, "loss": 0.0839, "theoretical_loss": 3.91973002379, "tokens_seen": 529006592 }, { "epoch": 0.16, "learning_rate": 0.0008480982185844969, "loss": 0.0818, "theoretical_loss": 3.919524408804137, "tokens_seen": 529268736 }, { "epoch": 0.16, "learning_rate": 0.0008480179746429145, "loss": 0.0827, "theoretical_loss": 3.9193189241321873, "tokens_seen": 529530880 }, { "epoch": 0.16, "learning_rate": 0.0008479377307013321, "loss": 0.0821, "theoretical_loss": 3.9191135696271098, "tokens_seen": 529793024 }, { "epoch": 0.16, "learning_rate": 0.0008478574867597496, "loss": 0.0851, "theoretical_loss": 3.9189083451421025, "tokens_seen": 530055168 }, { "epoch": 0.16, "learning_rate": 0.0008477772428181672, "loss": 0.0782, "theoretical_loss": 3.9187032505306023, "tokens_seen": 530317312 }, { "epoch": 0.16, "learning_rate": 0.0008476969988765848, "loss": 0.0809, "theoretical_loss": 3.918498285646282, "tokens_seen": 530579456 }, { "epoch": 0.16, "objective/train/advantage_avg": 7.436873420374468e-05, "objective/train/docs_used": 199409, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5998988151550293, "objective/train/original_loss": 1.5998989343643188, "objective/train/theoretical_loss": 3.9182934503430538, "objective/train/tokens_used": 551301600, "objective/train/value_avg": -0.00795745849609375, "objective/train/value_loss": 0.00032716241548769176, "objective/train/value_max": -0.00034332275390625, "objective/train/value_min": -0.72900390625, "objective/train/value_reward_corr": 0.5799935782849467, "objective/train/value_std": 0.01358795166015625, "objective/train/weight_avg": 1.0002270936965942, "objective/train/weighted_lm_loss": 1.5996286869049072, "objective/train/weights_max": 1.4670954942703247, "objective/train/weights_min": 0.38993605971336365, "theoretical_loss": 3.9182934503430538, "tokens_seen": 530841600 }, { "epoch": 0.16, "learning_rate": 0.0008476167549350025, "loss": 0.0815, "theoretical_loss": 3.9182934503430538, "tokens_seen": 530841600 }, { "epoch": 0.16, "learning_rate": 0.00084753651099342, "loss": 0.0792, "theoretical_loss": 3.918088744475064, "tokens_seen": 531103744 }, { "epoch": 0.16, "learning_rate": 0.0008474562670518377, "loss": 0.084, "theoretical_loss": 3.9178841678966956, "tokens_seen": 531365888 }, { "epoch": 0.16, "learning_rate": 0.0008473760231102552, "loss": 0.0841, "theoretical_loss": 3.9176797204625693, "tokens_seen": 531628032 }, { "epoch": 0.16, "learning_rate": 0.0008472957791686727, "loss": 0.083, "theoretical_loss": 3.917475402027537, "tokens_seen": 531890176 }, { "epoch": 0.16, "learning_rate": 0.0008472155352270904, "loss": 0.0796, "theoretical_loss": 3.917271212446689, "tokens_seen": 532152320 }, { "epoch": 0.16, "learning_rate": 0.0008471352912855079, "loss": 0.082, "theoretical_loss": 3.917067151575348, "tokens_seen": 532414464 }, { "epoch": 0.16, "learning_rate": 0.0008470550473439255, "loss": 0.0846, "theoretical_loss": 3.916863219269069, "tokens_seen": 532676608 }, { "epoch": 0.16, "learning_rate": 0.0008469748034023431, "loss": 0.0813, "theoretical_loss": 3.9166594153836427, "tokens_seen": 532938752 }, { "epoch": 0.16, "learning_rate": 0.0008468945594607608, "loss": 0.082, "theoretical_loss": 3.9164557397750897, "tokens_seen": 533200896 }, { "epoch": 0.16, "learning_rate": 0.0008468143155191783, "loss": 0.0815, "theoretical_loss": 3.916252192299665, "tokens_seen": 533463040 }, { "epoch": 0.16, "learning_rate": 0.0008467340715775959, "loss": 0.0816, "theoretical_loss": 3.9160487728138538, "tokens_seen": 533725184 }, { "epoch": 0.16, "learning_rate": 0.0008466538276360135, "loss": 0.0821, "theoretical_loss": 3.9158454811743733, "tokens_seen": 533987328 }, { "epoch": 0.16, "learning_rate": 0.0008465735836944311, "loss": 0.0847, "theoretical_loss": 3.915642317238171, "tokens_seen": 534249472 }, { "epoch": 0.16, "learning_rate": 0.0008464933397528487, "loss": 0.0836, "theoretical_loss": 3.915439280862423, "tokens_seen": 534511616 }, { "epoch": 0.16, "learning_rate": 0.0008464130958112662, "loss": 0.0811, "theoretical_loss": 3.915236371904539, "tokens_seen": 534773760 }, { "epoch": 0.16, "learning_rate": 0.0008463328518696839, "loss": 0.0822, "theoretical_loss": 3.915033590222153, "tokens_seen": 535035904 }, { "epoch": 0.16, "learning_rate": 0.0008462526079281014, "loss": 0.0824, "theoretical_loss": 3.914830935673132, "tokens_seen": 535298048 }, { "epoch": 0.16, "learning_rate": 0.0008461723639865191, "loss": 0.0793, "theoretical_loss": 3.914628408115569, "tokens_seen": 535560192 }, { "epoch": 0.16, "learning_rate": 0.0008460921200449367, "loss": 0.0813, "theoretical_loss": 3.9144260074077843, "tokens_seen": 535822336 }, { "epoch": 0.16, "learning_rate": 0.0008460118761033542, "loss": 0.084, "theoretical_loss": 3.9142237334083276, "tokens_seen": 536084480 }, { "epoch": 0.16, "learning_rate": 0.0008459316321617718, "loss": 0.0814, "theoretical_loss": 3.914021585975973, "tokens_seen": 536346624 }, { "epoch": 0.16, "learning_rate": 0.0008458513882201894, "loss": 0.0803, "theoretical_loss": 3.9138195649697227, "tokens_seen": 536608768 }, { "epoch": 0.16, "learning_rate": 0.000845771144278607, "loss": 0.0823, "theoretical_loss": 3.9136176702488044, "tokens_seen": 536870912 }, { "epoch": 0.16, "learning_rate": 0.0008456909003370245, "loss": 0.0789, "theoretical_loss": 3.91341590167267, "tokens_seen": 537133056 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0004424227518029511, "objective/train/docs_used": 201641, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5473430156707764, "objective/train/original_loss": 1.5473428964614868, "objective/train/theoretical_loss": 3.9132142591009975, "objective/train/tokens_used": 557855200, "objective/train/value_avg": -0.007518768310546875, "objective/train/value_loss": 0.00024131749523803592, "objective/train/value_max": -0.00021147727966308594, "objective/train/value_min": -0.64697265625, "objective/train/value_reward_corr": 0.657343727691196, "objective/train/value_std": 0.01261138916015625, "objective/train/weight_avg": 1.0005532503128052, "objective/train/weighted_lm_loss": 1.5476828813552856, "objective/train/weights_max": 1.3888442516326904, "objective/train/weights_min": 0.3710732161998749, "theoretical_loss": 3.9132142591009975, "tokens_seen": 537395200 }, { "epoch": 0.16, "learning_rate": 0.0008456106563954422, "loss": 0.0803, "theoretical_loss": 3.9132142591009975, "tokens_seen": 537395200 }, { "epoch": 0.16, "learning_rate": 0.0008455304124538597, "loss": 0.081, "theoretical_loss": 3.9130127423936907, "tokens_seen": 537657344 }, { "epoch": 0.16, "learning_rate": 0.0008454501685122772, "loss": 0.0836, "theoretical_loss": 3.9128113514108733, "tokens_seen": 537919488 }, { "epoch": 0.16, "learning_rate": 0.000845369924570695, "loss": 0.0817, "theoretical_loss": 3.9126100860128963, "tokens_seen": 538181632 }, { "epoch": 0.16, "learning_rate": 0.0008452896806291125, "loss": 0.0832, "theoretical_loss": 3.9124089460603324, "tokens_seen": 538443776 }, { "epoch": 0.16, "learning_rate": 0.0008452094366875302, "loss": 0.0823, "theoretical_loss": 3.9122079314139766, "tokens_seen": 538705920 }, { "epoch": 0.16, "learning_rate": 0.0008451291927459477, "loss": 0.0824, "theoretical_loss": 3.9120070419348463, "tokens_seen": 538968064 }, { "epoch": 0.16, "learning_rate": 0.0008450489488043653, "loss": 0.0795, "theoretical_loss": 3.9118062774841804, "tokens_seen": 539230208 }, { "epoch": 0.16, "learning_rate": 0.0008449687048627829, "loss": 0.0836, "theoretical_loss": 3.91160563792344, "tokens_seen": 539492352 }, { "epoch": 0.16, "learning_rate": 0.0008448884609212004, "loss": 0.0825, "theoretical_loss": 3.911405123114305, "tokens_seen": 539754496 }, { "epoch": 0.16, "learning_rate": 0.000844808216979618, "loss": 0.0844, "theoretical_loss": 3.9112047329186783, "tokens_seen": 540016640 }, { "epoch": 0.16, "learning_rate": 0.0008447279730380356, "loss": 0.0829, "theoretical_loss": 3.911004467198679, "tokens_seen": 540278784 }, { "epoch": 0.16, "learning_rate": 0.0008446477290964533, "loss": 0.0839, "theoretical_loss": 3.9108043258166485, "tokens_seen": 540540928 }, { "epoch": 0.16, "learning_rate": 0.0008445674851548708, "loss": 0.0813, "theoretical_loss": 3.910604308635146, "tokens_seen": 540803072 }, { "epoch": 0.16, "learning_rate": 0.0008444872412132885, "loss": 0.0796, "theoretical_loss": 3.9104044155169495, "tokens_seen": 541065216 }, { "epoch": 0.16, "learning_rate": 0.000844406997271706, "loss": 0.0852, "theoretical_loss": 3.910204646325055, "tokens_seen": 541327360 }, { "epoch": 0.16, "learning_rate": 0.0008443267533301235, "loss": 0.0845, "theoretical_loss": 3.9100050009226752, "tokens_seen": 541589504 }, { "epoch": 0.16, "learning_rate": 0.0008442465093885412, "loss": 0.0826, "theoretical_loss": 3.9098054791732406, "tokens_seen": 541851648 }, { "epoch": 0.16, "learning_rate": 0.0008441662654469587, "loss": 0.0833, "theoretical_loss": 3.909606080940399, "tokens_seen": 542113792 }, { "epoch": 0.16, "learning_rate": 0.0008440860215053764, "loss": 0.0824, "theoretical_loss": 3.909406806088013, "tokens_seen": 542375936 }, { "epoch": 0.16, "learning_rate": 0.0008440057775637939, "loss": 0.0836, "theoretical_loss": 3.909207654480162, "tokens_seen": 542638080 }, { "epoch": 0.16, "learning_rate": 0.0008439255336222116, "loss": 0.0817, "theoretical_loss": 3.9090086259811403, "tokens_seen": 542900224 }, { "epoch": 0.16, "learning_rate": 0.0008438452896806292, "loss": 0.082, "theoretical_loss": 3.908809720455457, "tokens_seen": 543162368 }, { "epoch": 0.16, "learning_rate": 0.0008437650457390467, "loss": 0.0819, "theoretical_loss": 3.908610937767836, "tokens_seen": 543424512 }, { "epoch": 0.16, "learning_rate": 0.0008436848017974643, "loss": 0.0828, "theoretical_loss": 3.9084122777832144, "tokens_seen": 543686656 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0003520216268952936, "objective/train/docs_used": 204061, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.68601655960083, "objective/train/original_loss": 1.6860167980194092, "objective/train/theoretical_loss": 3.908213740366744, "objective/train/tokens_used": 564408800, "objective/train/value_avg": -0.0084991455078125, "objective/train/value_loss": 0.0002631843963172287, "objective/train/value_max": -0.0003101825714111328, "objective/train/value_min": -0.2208251953125, "objective/train/value_reward_corr": 0.6833106642077473, "objective/train/value_std": 0.01250457763671875, "objective/train/weight_avg": 1.000474214553833, "objective/train/weighted_lm_loss": 1.6869690418243408, "objective/train/weights_max": 1.1504311561584473, "objective/train/weights_min": 0.3698042333126068, "theoretical_loss": 3.908213740366744, "tokens_seen": 543948800 }, { "epoch": 0.16, "learning_rate": 0.0008436045578558819, "loss": 0.0831, "theoretical_loss": 3.908213740366744, "tokens_seen": 543948800 }, { "epoch": 0.16, "learning_rate": 0.0008435243139142995, "loss": 0.0826, "theoretical_loss": 3.908015325383788, "tokens_seen": 544210944 }, { "epoch": 0.16, "learning_rate": 0.000843444069972717, "loss": 0.0819, "theoretical_loss": 3.907817032699924, "tokens_seen": 544473088 }, { "epoch": 0.17, "learning_rate": 0.0008433638260311347, "loss": 0.0809, "theoretical_loss": 3.9076188621809416, "tokens_seen": 544735232 }, { "epoch": 0.17, "learning_rate": 0.0008432835820895522, "loss": 0.0838, "theoretical_loss": 3.9074208136928408, "tokens_seen": 544997376 }, { "epoch": 0.17, "learning_rate": 0.0008432033381479699, "loss": 0.083, "theoretical_loss": 3.907222887101834, "tokens_seen": 545259520 }, { "epoch": 0.17, "learning_rate": 0.0008431230942063875, "loss": 0.0854, "theoretical_loss": 3.9070250822743446, "tokens_seen": 545521664 }, { "epoch": 0.17, "learning_rate": 0.000843042850264805, "loss": 0.0844, "theoretical_loss": 3.906827399077006, "tokens_seen": 545783808 }, { "epoch": 0.17, "learning_rate": 0.0008429626063232226, "loss": 0.0827, "theoretical_loss": 3.9066298373766615, "tokens_seen": 546045952 }, { "epoch": 0.17, "learning_rate": 0.0008428823623816402, "loss": 0.0831, "theoretical_loss": 3.9064323970403656, "tokens_seen": 546308096 }, { "epoch": 0.17, "learning_rate": 0.0008428021184400578, "loss": 0.0832, "theoretical_loss": 3.9062350779353787, "tokens_seen": 546570240 }, { "epoch": 0.17, "learning_rate": 0.0008427218744984754, "loss": 0.0825, "theoretical_loss": 3.906037879929174, "tokens_seen": 546832384 }, { "epoch": 0.17, "learning_rate": 0.000842641630556893, "loss": 0.0813, "theoretical_loss": 3.90584080288943, "tokens_seen": 547094528 }, { "epoch": 0.17, "learning_rate": 0.0008425613866153105, "loss": 0.083, "theoretical_loss": 3.905643846684034, "tokens_seen": 547356672 }, { "epoch": 0.17, "learning_rate": 0.0008424811426737282, "loss": 0.08, "theoretical_loss": 3.9054470111810815, "tokens_seen": 547618816 }, { "epoch": 0.17, "learning_rate": 0.0008424008987321458, "loss": 0.0811, "theoretical_loss": 3.9052502962488735, "tokens_seen": 547880960 }, { "epoch": 0.17, "learning_rate": 0.0008423206547905633, "loss": 0.0826, "theoretical_loss": 3.9050537017559197, "tokens_seen": 548143104 }, { "epoch": 0.17, "learning_rate": 0.000842240410848981, "loss": 0.0827, "theoretical_loss": 3.904857227570934, "tokens_seen": 548405248 }, { "epoch": 0.17, "learning_rate": 0.0008421601669073985, "loss": 0.0788, "theoretical_loss": 3.904660873562837, "tokens_seen": 548667392 }, { "epoch": 0.17, "learning_rate": 0.0008420799229658161, "loss": 0.0814, "theoretical_loss": 3.9044646396007545, "tokens_seen": 548929536 }, { "epoch": 0.17, "learning_rate": 0.0008419996790242337, "loss": 0.0818, "theoretical_loss": 3.9042685255540177, "tokens_seen": 549191680 }, { "epoch": 0.17, "learning_rate": 0.0008419194350826512, "loss": 0.0825, "theoretical_loss": 3.9040725312921616, "tokens_seen": 549453824 }, { "epoch": 0.17, "learning_rate": 0.0008418391911410688, "loss": 0.0825, "theoretical_loss": 3.9038766566849263, "tokens_seen": 549715968 }, { "epoch": 0.17, "learning_rate": 0.0008417589471994864, "loss": 0.0823, "theoretical_loss": 3.903680901602254, "tokens_seen": 549978112 }, { "epoch": 0.17, "learning_rate": 0.0008416787032579041, "loss": 0.0811, "theoretical_loss": 3.9034852659142913, "tokens_seen": 550240256 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0010044012451544404, "objective/train/docs_used": 206347, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6453677415847778, "objective/train/original_loss": 1.6453678607940674, "objective/train/theoretical_loss": 3.9032897494913876, "objective/train/tokens_used": 570962400, "objective/train/value_avg": -0.0096435546875, "objective/train/value_loss": 0.0002447115839459002, "objective/train/value_max": -0.00025916099548339844, "objective/train/value_min": -0.59619140625, "objective/train/value_reward_corr": 0.7088037903523504, "objective/train/value_std": 0.01555633544921875, "objective/train/weight_avg": 1.0011190176010132, "objective/train/weighted_lm_loss": 1.6466437578201294, "objective/train/weights_max": 1.6212894916534424, "objective/train/weights_min": 0.3786143362522125, "theoretical_loss": 3.9032897494913876, "tokens_seen": 550502400 }, { "epoch": 0.17, "learning_rate": 0.0008415984593163216, "loss": 0.0808, "theoretical_loss": 3.9032897494913876, "tokens_seen": 550502400 }, { "epoch": 0.17, "learning_rate": 0.0008415182153747393, "loss": 0.0809, "theoretical_loss": 3.9030943522040946, "tokens_seen": 550764544 }, { "epoch": 0.17, "learning_rate": 0.0008414379714331568, "loss": 0.0847, "theoretical_loss": 3.902899073923166, "tokens_seen": 551026688 }, { "epoch": 0.17, "learning_rate": 0.0008413577274915744, "loss": 0.0825, "theoretical_loss": 3.902703914519557, "tokens_seen": 551288832 }, { "epoch": 0.17, "learning_rate": 0.000841277483549992, "loss": 0.0825, "theoretical_loss": 3.9025088738644236, "tokens_seen": 551550976 }, { "epoch": 0.17, "learning_rate": 0.0008411972396084095, "loss": 0.0796, "theoretical_loss": 3.9023139518291243, "tokens_seen": 551813120 }, { "epoch": 0.17, "learning_rate": 0.0008411169956668272, "loss": 0.0806, "theoretical_loss": 3.902119148285216, "tokens_seen": 552075264 }, { "epoch": 0.17, "learning_rate": 0.0008410367517252447, "loss": 0.0824, "theoretical_loss": 3.9019244631044563, "tokens_seen": 552337408 }, { "epoch": 0.17, "learning_rate": 0.0008409565077836624, "loss": 0.0798, "theoretical_loss": 3.9017298961588027, "tokens_seen": 552599552 }, { "epoch": 0.17, "learning_rate": 0.00084087626384208, "loss": 0.0815, "theoretical_loss": 3.901535447320412, "tokens_seen": 552861696 }, { "epoch": 0.17, "learning_rate": 0.0008407960199004975, "loss": 0.0809, "theoretical_loss": 3.901341116461639, "tokens_seen": 553123840 }, { "epoch": 0.17, "learning_rate": 0.0008407157759589151, "loss": 0.0804, "theoretical_loss": 3.9011469034550372, "tokens_seen": 553385984 }, { "epoch": 0.17, "learning_rate": 0.0008406355320173327, "loss": 0.0816, "theoretical_loss": 3.900952808173358, "tokens_seen": 553648128 }, { "epoch": 0.17, "learning_rate": 0.0008405552880757503, "loss": 0.0834, "theoretical_loss": 3.900758830489551, "tokens_seen": 553910272 }, { "epoch": 0.17, "learning_rate": 0.0008404750441341678, "loss": 0.0827, "theoretical_loss": 3.900564970276762, "tokens_seen": 554172416 }, { "epoch": 0.17, "learning_rate": 0.0008403948001925855, "loss": 0.0809, "theoretical_loss": 3.9003712274083346, "tokens_seen": 554434560 }, { "epoch": 0.17, "learning_rate": 0.000840314556251003, "loss": 0.0822, "theoretical_loss": 3.9001776017578074, "tokens_seen": 554696704 }, { "epoch": 0.17, "learning_rate": 0.0008402343123094207, "loss": 0.0812, "theoretical_loss": 3.899984093198916, "tokens_seen": 554958848 }, { "epoch": 0.17, "learning_rate": 0.0008401540683678383, "loss": 0.0798, "theoretical_loss": 3.899790701605592, "tokens_seen": 555220992 }, { "epoch": 0.17, "learning_rate": 0.0008400738244262558, "loss": 0.0807, "theoretical_loss": 3.899597426851961, "tokens_seen": 555483136 }, { "epoch": 0.17, "learning_rate": 0.0008399935804846735, "loss": 0.078, "theoretical_loss": 3.899404268812343, "tokens_seen": 555745280 }, { "epoch": 0.17, "learning_rate": 0.000839913336543091, "loss": 0.0803, "theoretical_loss": 3.8992112273612545, "tokens_seen": 556007424 }, { "epoch": 0.17, "learning_rate": 0.0008398330926015086, "loss": 0.0821, "theoretical_loss": 3.8990183023734044, "tokens_seen": 556269568 }, { "epoch": 0.17, "learning_rate": 0.0008397528486599262, "loss": 0.0813, "theoretical_loss": 3.8988254937236952, "tokens_seen": 556531712 }, { "epoch": 0.17, "learning_rate": 0.0008396726047183438, "loss": 0.0815, "theoretical_loss": 3.8986328012872233, "tokens_seen": 556793856 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.002579332096502185, "objective/train/docs_used": 208740, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5783114433288574, "objective/train/original_loss": 1.5783112049102783, "objective/train/theoretical_loss": 3.8984402249392778, "objective/train/tokens_used": 577516000, "objective/train/value_avg": -0.00783538818359375, "objective/train/value_loss": 0.0001847637031460181, "objective/train/value_max": -0.0002868175506591797, "objective/train/value_min": -0.235107421875, "objective/train/value_reward_corr": 0.47073112766333935, "objective/train/value_std": 0.00836944580078125, "objective/train/weight_avg": 1.00266432762146, "objective/train/weighted_lm_loss": 1.5828243494033813, "objective/train/weights_max": 1.1927765607833862, "objective/train/weights_min": 0.36875197291374207, "theoretical_loss": 3.8984402249392778, "tokens_seen": 557056000 }, { "epoch": 0.17, "learning_rate": 0.0008395923607767613, "loss": 0.079, "theoretical_loss": 3.8984402249392778, "tokens_seen": 557056000 }, { "epoch": 0.17, "learning_rate": 0.000839512116835179, "loss": 0.0781, "theoretical_loss": 3.8982477645553395, "tokens_seen": 557318144 }, { "epoch": 0.17, "learning_rate": 0.0008394318728935966, "loss": 0.0796, "theoretical_loss": 3.898055420011082, "tokens_seen": 557580288 }, { "epoch": 0.17, "learning_rate": 0.0008393516289520141, "loss": 0.0779, "theoretical_loss": 3.8978631911823705, "tokens_seen": 557842432 }, { "epoch": 0.17, "learning_rate": 0.0008392713850104318, "loss": 0.0799, "theoretical_loss": 3.8976710779452612, "tokens_seen": 558104576 }, { "epoch": 0.17, "learning_rate": 0.0008391911410688493, "loss": 0.0811, "theoretical_loss": 3.8974790801760015, "tokens_seen": 558366720 }, { "epoch": 0.17, "learning_rate": 0.0008391108971272669, "loss": 0.0834, "theoretical_loss": 3.897287197751029, "tokens_seen": 558628864 }, { "epoch": 0.17, "learning_rate": 0.0008390306531856845, "loss": 0.0828, "theoretical_loss": 3.897095430546971, "tokens_seen": 558891008 }, { "epoch": 0.17, "learning_rate": 0.000838950409244102, "loss": 0.083, "theoretical_loss": 3.896903778440646, "tokens_seen": 559153152 }, { "epoch": 0.17, "learning_rate": 0.0008388701653025197, "loss": 0.0822, "theoretical_loss": 3.896712241309061, "tokens_seen": 559415296 }, { "epoch": 0.17, "learning_rate": 0.0008387899213609372, "loss": 0.083, "theoretical_loss": 3.896520819029411, "tokens_seen": 559677440 }, { "epoch": 0.17, "learning_rate": 0.0008387096774193549, "loss": 0.0816, "theoretical_loss": 3.896329511479082, "tokens_seen": 559939584 }, { "epoch": 0.17, "learning_rate": 0.0008386294334777725, "loss": 0.0789, "theoretical_loss": 3.8961383185356455, "tokens_seen": 560201728 }, { "epoch": 0.17, "learning_rate": 0.0008385491895361901, "loss": 0.0789, "theoretical_loss": 3.895947240076862, "tokens_seen": 560463872 }, { "epoch": 0.17, "learning_rate": 0.0008384689455946076, "loss": 0.0782, "theoretical_loss": 3.895756275980681, "tokens_seen": 560726016 }, { "epoch": 0.17, "learning_rate": 0.0008383887016530252, "loss": 0.0817, "theoretical_loss": 3.895565426125237, "tokens_seen": 560988160 }, { "epoch": 0.17, "learning_rate": 0.0008383084577114428, "loss": 0.0809, "theoretical_loss": 3.8953746903888513, "tokens_seen": 561250304 }, { "epoch": 0.17, "learning_rate": 0.0008382282137698603, "loss": 0.0785, "theoretical_loss": 3.895184068650033, "tokens_seen": 561512448 }, { "epoch": 0.17, "learning_rate": 0.000838147969828278, "loss": 0.0814, "theoretical_loss": 3.8949935607874764, "tokens_seen": 561774592 }, { "epoch": 0.17, "learning_rate": 0.0008380677258866955, "loss": 0.0803, "theoretical_loss": 3.8948031666800613, "tokens_seen": 562036736 }, { "epoch": 0.17, "learning_rate": 0.0008379874819451132, "loss": 0.0835, "theoretical_loss": 3.8946128862068528, "tokens_seen": 562298880 }, { "epoch": 0.17, "learning_rate": 0.0008379072380035308, "loss": 0.0802, "theoretical_loss": 3.8944227192471006, "tokens_seen": 562561024 }, { "epoch": 0.17, "learning_rate": 0.0008378269940619483, "loss": 0.0818, "theoretical_loss": 3.8942326656802395, "tokens_seen": 562823168 }, { "epoch": 0.17, "learning_rate": 0.000837746750120366, "loss": 0.0825, "theoretical_loss": 3.894042725385888, "tokens_seen": 563085312 }, { "epoch": 0.17, "learning_rate": 0.0008376665061787835, "loss": 0.0834, "theoretical_loss": 3.893852898243849, "tokens_seen": 563347456 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0005252464907243848, "objective/train/docs_used": 211161, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6151103973388672, "objective/train/original_loss": 1.6151103973388672, "objective/train/theoretical_loss": 3.8936631841341076, "objective/train/tokens_used": 584069600, "objective/train/value_avg": -0.0084381103515625, "objective/train/value_loss": 0.00027146536740474403, "objective/train/value_max": -0.0002613067626953125, "objective/train/value_min": -0.5791015625, "objective/train/value_reward_corr": 0.630790701887311, "objective/train/value_std": 0.01227569580078125, "objective/train/weight_avg": 1.0006484985351562, "objective/train/weighted_lm_loss": 1.6155797243118286, "objective/train/weights_max": 1.255659818649292, "objective/train/weights_min": 0.3694954514503479, "theoretical_loss": 3.8936631841341076, "tokens_seen": 563609600 }, { "epoch": 0.17, "learning_rate": 0.0008375862622372011, "loss": 0.0798, "theoretical_loss": 3.8936631841341076, "tokens_seen": 563609600 }, { "epoch": 0.17, "learning_rate": 0.0008375060182956187, "loss": 0.0812, "theoretical_loss": 3.893473582936833, "tokens_seen": 563871744 }, { "epoch": 0.17, "learning_rate": 0.0008374257743540363, "loss": 0.0826, "theoretical_loss": 3.8932840945323774, "tokens_seen": 564133888 }, { "epoch": 0.17, "learning_rate": 0.0008373455304124538, "loss": 0.0806, "theoretical_loss": 3.8930947188012737, "tokens_seen": 564396032 }, { "epoch": 0.17, "learning_rate": 0.0008372652864708715, "loss": 0.0783, "theoretical_loss": 3.8929054556242377, "tokens_seen": 564658176 }, { "epoch": 0.17, "learning_rate": 0.0008371850425292891, "loss": 0.0787, "theoretical_loss": 3.892716304882167, "tokens_seen": 564920320 }, { "epoch": 0.17, "learning_rate": 0.0008371047985877066, "loss": 0.0811, "theoretical_loss": 3.892527266456141, "tokens_seen": 565182464 }, { "epoch": 0.17, "learning_rate": 0.0008370245546461243, "loss": 0.0828, "theoretical_loss": 3.8923383402274174, "tokens_seen": 565444608 }, { "epoch": 0.17, "learning_rate": 0.0008369443107045418, "loss": 0.0819, "theoretical_loss": 3.8921495260774375, "tokens_seen": 565706752 }, { "epoch": 0.17, "learning_rate": 0.0008368640667629594, "loss": 0.0809, "theoretical_loss": 3.8919608238878216, "tokens_seen": 565968896 }, { "epoch": 0.17, "learning_rate": 0.000836783822821377, "loss": 0.077, "theoretical_loss": 3.891772233540369, "tokens_seen": 566231040 }, { "epoch": 0.17, "learning_rate": 0.0008367035788797946, "loss": 0.0798, "theoretical_loss": 3.8915837549170584, "tokens_seen": 566493184 }, { "epoch": 0.17, "learning_rate": 0.0008366233349382121, "loss": 0.079, "theoretical_loss": 3.89139538790005, "tokens_seen": 566755328 }, { "epoch": 0.17, "learning_rate": 0.0008365430909966297, "loss": 0.0827, "theoretical_loss": 3.8912071323716795, "tokens_seen": 567017472 }, { "epoch": 0.17, "learning_rate": 0.0008364628470550474, "loss": 0.0811, "theoretical_loss": 3.8910189882144626, "tokens_seen": 567279616 }, { "epoch": 0.17, "learning_rate": 0.000836382603113465, "loss": 0.0831, "theoretical_loss": 3.8908309553110936, "tokens_seen": 567541760 }, { "epoch": 0.17, "learning_rate": 0.0008363023591718826, "loss": 0.0802, "theoretical_loss": 3.8906430335444426, "tokens_seen": 567803904 }, { "epoch": 0.17, "learning_rate": 0.0008362221152303001, "loss": 0.0813, "theoretical_loss": 3.8904552227975593, "tokens_seen": 568066048 }, { "epoch": 0.17, "learning_rate": 0.0008361418712887178, "loss": 0.082, "theoretical_loss": 3.8902675229536685, "tokens_seen": 568328192 }, { "epoch": 0.17, "learning_rate": 0.0008360616273471353, "loss": 0.0823, "theoretical_loss": 3.8900799338961725, "tokens_seen": 568590336 }, { "epoch": 0.17, "learning_rate": 0.0008359813834055528, "loss": 0.0832, "theoretical_loss": 3.8898924555086496, "tokens_seen": 568852480 }, { "epoch": 0.17, "learning_rate": 0.0008359011394639705, "loss": 0.0809, "theoretical_loss": 3.8897050876748542, "tokens_seen": 569114624 }, { "epoch": 0.17, "learning_rate": 0.000835820895522388, "loss": 0.0821, "theoretical_loss": 3.8895178302787166, "tokens_seen": 569376768 }, { "epoch": 0.17, "learning_rate": 0.0008357406515808057, "loss": 0.0814, "theoretical_loss": 3.8893306832043404, "tokens_seen": 569638912 }, { "epoch": 0.17, "learning_rate": 0.0008356604076392233, "loss": 0.0833, "theoretical_loss": 3.8891436463360076, "tokens_seen": 569901056 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0007684892625547945, "objective/train/docs_used": 213119, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.636327862739563, "objective/train/original_loss": 1.6363275051116943, "objective/train/theoretical_loss": 3.8889567195581716, "objective/train/tokens_used": 590623200, "objective/train/value_avg": -0.00799560546875, "objective/train/value_loss": 0.00021641695639118552, "objective/train/value_max": -0.00025916099548339844, "objective/train/value_min": -0.65771484375, "objective/train/value_reward_corr": 0.6434202611001032, "objective/train/value_std": 0.0118408203125, "objective/train/weight_avg": 1.000868797302246, "objective/train/weighted_lm_loss": 1.6379215717315674, "objective/train/weights_max": 1.409537672996521, "objective/train/weights_min": 0.36900386214256287, "theoretical_loss": 3.8889567195581716, "tokens_seen": 570163200 }, { "epoch": 0.17, "learning_rate": 0.0008355801636976409, "loss": 0.0818, "theoretical_loss": 3.8889567195581716, "tokens_seen": 570163200 }, { "epoch": 0.17, "learning_rate": 0.0008354999197560584, "loss": 0.0787, "theoretical_loss": 3.8887699027554614, "tokens_seen": 570425344 }, { "epoch": 0.17, "learning_rate": 0.000835419675814476, "loss": 0.0802, "theoretical_loss": 3.8885831958126786, "tokens_seen": 570687488 }, { "epoch": 0.17, "learning_rate": 0.0008353394318728936, "loss": 0.0823, "theoretical_loss": 3.8883965986148015, "tokens_seen": 570949632 }, { "epoch": 0.17, "learning_rate": 0.0008352591879313111, "loss": 0.079, "theoretical_loss": 3.888210111046978, "tokens_seen": 571211776 }, { "epoch": 0.17, "learning_rate": 0.0008351789439897288, "loss": 0.0822, "theoretical_loss": 3.8880237329945295, "tokens_seen": 571473920 }, { "epoch": 0.17, "learning_rate": 0.0008350987000481463, "loss": 0.0836, "theoretical_loss": 3.887837464342952, "tokens_seen": 571736064 }, { "epoch": 0.17, "learning_rate": 0.0008350184561065641, "loss": 0.0825, "theoretical_loss": 3.8876513049779113, "tokens_seen": 571998208 }, { "epoch": 0.17, "learning_rate": 0.0008349382121649816, "loss": 0.0826, "theoretical_loss": 3.887465254785246, "tokens_seen": 572260352 }, { "epoch": 0.17, "learning_rate": 0.0008348579682233991, "loss": 0.0836, "theoretical_loss": 3.887279313650967, "tokens_seen": 572522496 }, { "epoch": 0.17, "learning_rate": 0.0008347777242818168, "loss": 0.0831, "theoretical_loss": 3.8870934814612546, "tokens_seen": 572784640 }, { "epoch": 0.17, "learning_rate": 0.0008346974803402343, "loss": 0.0782, "theoretical_loss": 3.886907758102461, "tokens_seen": 573046784 }, { "epoch": 0.17, "learning_rate": 0.0008346172363986519, "loss": 0.0829, "theoretical_loss": 3.8867221434611094, "tokens_seen": 573308928 }, { "epoch": 0.17, "learning_rate": 0.0008345369924570695, "loss": 0.0817, "theoretical_loss": 3.8865366374238914, "tokens_seen": 573571072 }, { "epoch": 0.17, "learning_rate": 0.0008344567485154871, "loss": 0.0816, "theoretical_loss": 3.88635123987767, "tokens_seen": 573833216 }, { "epoch": 0.17, "learning_rate": 0.0008343765045739046, "loss": 0.0823, "theoretical_loss": 3.8861659507094766, "tokens_seen": 574095360 }, { "epoch": 0.17, "learning_rate": 0.0008342962606323223, "loss": 0.0813, "theoretical_loss": 3.885980769806513, "tokens_seen": 574357504 }, { "epoch": 0.17, "learning_rate": 0.0008342160166907399, "loss": 0.0844, "theoretical_loss": 3.8857956970561487, "tokens_seen": 574619648 }, { "epoch": 0.17, "learning_rate": 0.0008341357727491574, "loss": 0.0785, "theoretical_loss": 3.8856107323459215, "tokens_seen": 574881792 }, { "epoch": 0.17, "learning_rate": 0.0008340555288075751, "loss": 0.0819, "theoretical_loss": 3.8854258755635387, "tokens_seen": 575143936 }, { "epoch": 0.17, "learning_rate": 0.0008339752848659926, "loss": 0.0829, "theoretical_loss": 3.885241126596874, "tokens_seen": 575406080 }, { "epoch": 0.17, "learning_rate": 0.0008338950409244103, "loss": 0.0799, "theoretical_loss": 3.885056485333969, "tokens_seen": 575668224 }, { "epoch": 0.17, "learning_rate": 0.0008338147969828278, "loss": 0.0828, "theoretical_loss": 3.884871951663034, "tokens_seen": 575930368 }, { "epoch": 0.17, "learning_rate": 0.0008337345530412454, "loss": 0.0778, "theoretical_loss": 3.8846875254724442, "tokens_seen": 576192512 }, { "epoch": 0.17, "learning_rate": 0.000833654309099663, "loss": 0.0797, "theoretical_loss": 3.8845032066507414, "tokens_seen": 576454656 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0010946927359327674, "objective/train/docs_used": 215374, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6169906854629517, "objective/train/original_loss": 1.6169908046722412, "objective/train/theoretical_loss": 3.884318995086635, "objective/train/tokens_used": 597176800, "objective/train/value_avg": -0.0078277587890625, "objective/train/value_loss": 0.0002228701050626114, "objective/train/value_max": -0.00022172927856445312, "objective/train/value_min": -0.49267578125, "objective/train/value_reward_corr": 0.7077948649588294, "objective/train/value_std": 0.01322174072265625, "objective/train/weight_avg": 1.0012009143829346, "objective/train/weighted_lm_loss": 1.6180263757705688, "objective/train/weights_max": 1.1417571306228638, "objective/train/weights_min": 0.3976996839046478, "theoretical_loss": 3.884318995086635, "tokens_seen": 576716800 }, { "epoch": 0.17, "learning_rate": 0.0008335740651580805, "loss": 0.0791, "theoretical_loss": 3.884318995086635, "tokens_seen": 576716800 }, { "epoch": 0.17, "learning_rate": 0.0008334938212164982, "loss": 0.0814, "theoretical_loss": 3.8841348906689985, "tokens_seen": 576978944 }, { "epoch": 0.17, "learning_rate": 0.0008334135772749158, "loss": 0.0828, "theoretical_loss": 3.8839508932868725, "tokens_seen": 577241088 }, { "epoch": 0.18, "learning_rate": 0.0008333333333333334, "loss": 0.0803, "theoretical_loss": 3.8837670028294626, "tokens_seen": 577503232 }, { "epoch": 0.18, "learning_rate": 0.0008332530893917509, "loss": 0.0822, "theoretical_loss": 3.883583219186138, "tokens_seen": 577765376 }, { "epoch": 0.18, "learning_rate": 0.0008331728454501686, "loss": 0.082, "theoretical_loss": 3.8833995422464342, "tokens_seen": 578027520 }, { "epoch": 0.18, "learning_rate": 0.0008330926015085861, "loss": 0.0818, "theoretical_loss": 3.88321597190005, "tokens_seen": 578289664 }, { "epoch": 0.18, "learning_rate": 0.0008330123575670036, "loss": 0.0822, "theoretical_loss": 3.883032508036848, "tokens_seen": 578551808 }, { "epoch": 0.18, "learning_rate": 0.0008329321136254213, "loss": 0.0793, "theoretical_loss": 3.882849150546856, "tokens_seen": 578813952 }, { "epoch": 0.18, "learning_rate": 0.0008328518696838388, "loss": 0.0829, "theoretical_loss": 3.8826658993202625, "tokens_seen": 579076096 }, { "epoch": 0.18, "learning_rate": 0.0008327716257422565, "loss": 0.0814, "theoretical_loss": 3.8824827542474214, "tokens_seen": 579338240 }, { "epoch": 0.18, "learning_rate": 0.0008326913818006741, "loss": 0.0819, "theoretical_loss": 3.882299715218848, "tokens_seen": 579600384 }, { "epoch": 0.18, "learning_rate": 0.0008326111378590917, "loss": 0.0813, "theoretical_loss": 3.8821167821252196, "tokens_seen": 579862528 }, { "epoch": 0.18, "learning_rate": 0.0008325308939175093, "loss": 0.0828, "theoretical_loss": 3.8819339548573772, "tokens_seen": 580124672 }, { "epoch": 0.18, "learning_rate": 0.0008324506499759268, "loss": 0.0815, "theoretical_loss": 3.881751233306322, "tokens_seen": 580386816 }, { "epoch": 0.18, "learning_rate": 0.0008323704060343444, "loss": 0.0814, "theoretical_loss": 3.881568617363218, "tokens_seen": 580648960 }, { "epoch": 0.18, "learning_rate": 0.000832290162092762, "loss": 0.0824, "theoretical_loss": 3.881386106919389, "tokens_seen": 580911104 }, { "epoch": 0.18, "learning_rate": 0.0008322099181511796, "loss": 0.0826, "theoretical_loss": 3.88120370186632, "tokens_seen": 581173248 }, { "epoch": 0.18, "learning_rate": 0.0008321296742095971, "loss": 0.0802, "theoretical_loss": 3.881021402095657, "tokens_seen": 581435392 }, { "epoch": 0.18, "learning_rate": 0.0008320494302680149, "loss": 0.0807, "theoretical_loss": 3.880839207499205, "tokens_seen": 581697536 }, { "epoch": 0.18, "learning_rate": 0.0008319691863264324, "loss": 0.082, "theoretical_loss": 3.880657117968931, "tokens_seen": 581959680 }, { "epoch": 0.18, "learning_rate": 0.0008318889423848499, "loss": 0.0844, "theoretical_loss": 3.880475133396959, "tokens_seen": 582221824 }, { "epoch": 0.18, "learning_rate": 0.0008318086984432676, "loss": 0.0812, "theoretical_loss": 3.8802932536755748, "tokens_seen": 582483968 }, { "epoch": 0.18, "learning_rate": 0.0008317284545016851, "loss": 0.0833, "theoretical_loss": 3.880111478697221, "tokens_seen": 582746112 }, { "epoch": 0.18, "learning_rate": 0.0008316482105601027, "loss": 0.085, "theoretical_loss": 3.8799298083545004, "tokens_seen": 583008256 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0015177363529801369, "objective/train/docs_used": 217803, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.666736364364624, "objective/train/original_loss": 1.6667362451553345, "objective/train/theoretical_loss": 3.879748242540173, "objective/train/tokens_used": 603730400, "objective/train/value_avg": -0.011322021484375, "objective/train/value_loss": 0.0003124627110082656, "objective/train/value_max": -0.00027370452880859375, "objective/train/value_min": -0.642578125, "objective/train/value_reward_corr": 0.6764778613382025, "objective/train/value_std": 0.01788330078125, "objective/train/weight_avg": 1.0016603469848633, "objective/train/weighted_lm_loss": 1.668639898300171, "objective/train/weights_max": 1.3257246017456055, "objective/train/weights_min": 0.36870554089546204, "theoretical_loss": 3.879748242540173, "tokens_seen": 583270400 }, { "epoch": 0.18, "learning_rate": 0.0008315679666185203, "loss": 0.0814, "theoretical_loss": 3.879748242540173, "tokens_seen": 583270400 }, { "epoch": 0.18, "learning_rate": 0.0008314877226769379, "loss": 0.0839, "theoretical_loss": 3.8795667811471573, "tokens_seen": 583532544 }, { "epoch": 0.18, "learning_rate": 0.0008314074787353555, "loss": 0.082, "theoretical_loss": 3.8793854240685306, "tokens_seen": 583794688 }, { "epoch": 0.18, "learning_rate": 0.000831327234793773, "loss": 0.0817, "theoretical_loss": 3.879204171197525, "tokens_seen": 584056832 }, { "epoch": 0.18, "learning_rate": 0.0008312469908521907, "loss": 0.081, "theoretical_loss": 3.879023022427533, "tokens_seen": 584318976 }, { "epoch": 0.18, "learning_rate": 0.0008311667469106083, "loss": 0.0814, "theoretical_loss": 3.878841977652101, "tokens_seen": 584581120 }, { "epoch": 0.18, "learning_rate": 0.0008310865029690259, "loss": 0.0815, "theoretical_loss": 3.8786610367649343, "tokens_seen": 584843264 }, { "epoch": 0.18, "learning_rate": 0.0008310062590274434, "loss": 0.0826, "theoretical_loss": 3.8784801996598928, "tokens_seen": 585105408 }, { "epoch": 0.18, "learning_rate": 0.0008309260150858611, "loss": 0.0819, "theoretical_loss": 3.878299466230992, "tokens_seen": 585367552 }, { "epoch": 0.18, "learning_rate": 0.0008308457711442786, "loss": 0.0808, "theoretical_loss": 3.8781188363724057, "tokens_seen": 585629696 }, { "epoch": 0.18, "learning_rate": 0.0008307655272026961, "loss": 0.0807, "theoretical_loss": 3.87793830997846, "tokens_seen": 585891840 }, { "epoch": 0.18, "learning_rate": 0.0008306852832611138, "loss": 0.0803, "theoretical_loss": 3.8777578869436384, "tokens_seen": 586153984 }, { "epoch": 0.18, "learning_rate": 0.0008306050393195313, "loss": 0.0817, "theoretical_loss": 3.8775775671625765, "tokens_seen": 586416128 }, { "epoch": 0.18, "learning_rate": 0.000830524795377949, "loss": 0.082, "theoretical_loss": 3.8773973505300674, "tokens_seen": 586678272 }, { "epoch": 0.18, "learning_rate": 0.0008304445514363666, "loss": 0.08, "theoretical_loss": 3.877217236941055, "tokens_seen": 586940416 }, { "epoch": 0.18, "learning_rate": 0.0008303643074947842, "loss": 0.0833, "theoretical_loss": 3.877037226290641, "tokens_seen": 587202560 }, { "epoch": 0.18, "learning_rate": 0.0008302840635532017, "loss": 0.0798, "theoretical_loss": 3.8768573184740767, "tokens_seen": 587464704 }, { "epoch": 0.18, "learning_rate": 0.0008302038196116194, "loss": 0.0804, "theoretical_loss": 3.87667751338677, "tokens_seen": 587726848 }, { "epoch": 0.18, "learning_rate": 0.0008301235756700369, "loss": 0.0799, "theoretical_loss": 3.8764978109242794, "tokens_seen": 587988992 }, { "epoch": 0.18, "learning_rate": 0.0008300433317284545, "loss": 0.0819, "theoretical_loss": 3.8763182109823173, "tokens_seen": 588251136 }, { "epoch": 0.18, "learning_rate": 0.0008299630877868721, "loss": 0.0761, "theoretical_loss": 3.8761387134567475, "tokens_seen": 588513280 }, { "epoch": 0.18, "learning_rate": 0.0008298828438452896, "loss": 0.081, "theoretical_loss": 3.8759593182435874, "tokens_seen": 588775424 }, { "epoch": 0.18, "learning_rate": 0.0008298025999037074, "loss": 0.0844, "theoretical_loss": 3.875780025239005, "tokens_seen": 589037568 }, { "epoch": 0.18, "learning_rate": 0.0008297223559621249, "loss": 0.0793, "theoretical_loss": 3.8756008343393202, "tokens_seen": 589299712 }, { "epoch": 0.18, "learning_rate": 0.0008296421120205425, "loss": 0.0832, "theoretical_loss": 3.8754217454410043, "tokens_seen": 589561856 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.000526767922565341, "objective/train/docs_used": 220165, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7038625478744507, "objective/train/original_loss": 1.7038625478744507, "objective/train/theoretical_loss": 3.875242758440679, "objective/train/tokens_used": 610284000, "objective/train/value_avg": -0.00890350341796875, "objective/train/value_loss": 0.0003305371792521328, "objective/train/value_max": -0.0002378225326538086, "objective/train/value_min": -0.6884765625, "objective/train/value_reward_corr": 0.6204838794811066, "objective/train/value_std": 0.0142974853515625, "objective/train/weight_avg": 1.000672459602356, "objective/train/weighted_lm_loss": 1.7044023275375366, "objective/train/weights_max": 1.3236627578735352, "objective/train/weights_min": 0.3694390654563904, "theoretical_loss": 3.875242758440679, "tokens_seen": 589824000 }, { "epoch": 0.18, "learning_rate": 0.0008295618680789601, "loss": 0.0818, "theoretical_loss": 3.875242758440679, "tokens_seen": 589824000 }, { "epoch": 0.18, "learning_rate": 0.0008294816241373776, "loss": 0.0807, "theoretical_loss": 3.875063873235117, "tokens_seen": 590086144 }, { "epoch": 0.18, "learning_rate": 0.0008294013801957952, "loss": 0.085, "theoretical_loss": 3.874885089721242, "tokens_seen": 590348288 }, { "epoch": 0.18, "learning_rate": 0.0008293211362542128, "loss": 0.0778, "theoretical_loss": 3.8747064077961264, "tokens_seen": 590610432 }, { "epoch": 0.18, "learning_rate": 0.0008292408923126304, "loss": 0.0787, "theoretical_loss": 3.874527827356994, "tokens_seen": 590872576 }, { "epoch": 0.18, "learning_rate": 0.0008291606483710479, "loss": 0.0828, "theoretical_loss": 3.8743493483012172, "tokens_seen": 591134720 }, { "epoch": 0.18, "learning_rate": 0.0008290804044294657, "loss": 0.0803, "theoretical_loss": 3.874170970526317, "tokens_seen": 591396864 }, { "epoch": 0.18, "learning_rate": 0.0008290001604878832, "loss": 0.0824, "theoretical_loss": 3.873992693929965, "tokens_seen": 591659008 }, { "epoch": 0.18, "learning_rate": 0.0008289199165463007, "loss": 0.0781, "theoretical_loss": 3.8738145184099797, "tokens_seen": 591921152 }, { "epoch": 0.18, "learning_rate": 0.0008288396726047184, "loss": 0.0793, "theoretical_loss": 3.8736364438643296, "tokens_seen": 592183296 }, { "epoch": 0.18, "learning_rate": 0.0008287594286631359, "loss": 0.0831, "theoretical_loss": 3.87345847019113, "tokens_seen": 592445440 }, { "epoch": 0.18, "learning_rate": 0.0008286791847215536, "loss": 0.0787, "theoretical_loss": 3.8732805972886446, "tokens_seen": 592707584 }, { "epoch": 0.18, "learning_rate": 0.0008285989407799711, "loss": 0.0791, "theoretical_loss": 3.873102825055285, "tokens_seen": 592969728 }, { "epoch": 0.18, "learning_rate": 0.0008285186968383887, "loss": 0.0793, "theoretical_loss": 3.87292515338961, "tokens_seen": 593231872 }, { "epoch": 0.18, "learning_rate": 0.0008284384528968063, "loss": 0.08, "theoretical_loss": 3.872747582190324, "tokens_seen": 593494016 }, { "epoch": 0.18, "learning_rate": 0.0008283582089552239, "loss": 0.0813, "theoretical_loss": 3.8725701113562794, "tokens_seen": 593756160 }, { "epoch": 0.18, "learning_rate": 0.0008282779650136415, "loss": 0.0791, "theoretical_loss": 3.8723927407864758, "tokens_seen": 594018304 }, { "epoch": 0.18, "learning_rate": 0.0008281977210720591, "loss": 0.0814, "theoretical_loss": 3.8722154703800573, "tokens_seen": 594280448 }, { "epoch": 0.18, "learning_rate": 0.0008281174771304767, "loss": 0.082, "theoretical_loss": 3.8720383000363148, "tokens_seen": 594542592 }, { "epoch": 0.18, "learning_rate": 0.0008280372331888942, "loss": 0.0807, "theoretical_loss": 3.871861229654684, "tokens_seen": 594804736 }, { "epoch": 0.18, "learning_rate": 0.0008279569892473119, "loss": 0.0776, "theoretical_loss": 3.8716842591347476, "tokens_seen": 595066880 }, { "epoch": 0.18, "learning_rate": 0.0008278767453057294, "loss": 0.0818, "theoretical_loss": 3.871507388376231, "tokens_seen": 595329024 }, { "epoch": 0.18, "learning_rate": 0.0008277965013641469, "loss": 0.0792, "theoretical_loss": 3.871330617279006, "tokens_seen": 595591168 }, { "epoch": 0.18, "learning_rate": 0.0008277162574225646, "loss": 0.0812, "theoretical_loss": 3.8711539457430897, "tokens_seen": 595853312 }, { "epoch": 0.18, "learning_rate": 0.0008276360134809821, "loss": 0.0793, "theoretical_loss": 3.87097737366864, "tokens_seen": 596115456 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0013384453486651182, "objective/train/docs_used": 222574, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5089752674102783, "objective/train/original_loss": 1.5089752674102783, "objective/train/theoretical_loss": 3.870800900955963, "objective/train/tokens_used": 616837600, "objective/train/value_avg": -0.0093841552734375, "objective/train/value_loss": 0.0004224953299853951, "objective/train/value_max": -0.00018525123596191406, "objective/train/value_min": -0.80712890625, "objective/train/value_reward_corr": 0.6443973910376418, "objective/train/value_std": 0.0171661376953125, "objective/train/weight_avg": 1.0015227794647217, "objective/train/weighted_lm_loss": 1.5108550786972046, "objective/train/weights_max": 1.870554804801941, "objective/train/weights_min": 0.3708694577217102, "theoretical_loss": 3.870800900955963, "tokens_seen": 596377600 }, { "epoch": 0.18, "learning_rate": 0.0008275557695393999, "loss": 0.079, "theoretical_loss": 3.870800900955963, "tokens_seen": 596377600 }, { "epoch": 0.18, "learning_rate": 0.0008274755255978174, "loss": 0.0812, "theoretical_loss": 3.8706245275055062, "tokens_seen": 596639744 }, { "epoch": 0.18, "learning_rate": 0.000827395281656235, "loss": 0.0796, "theoretical_loss": 3.8704482532178606, "tokens_seen": 596901888 }, { "epoch": 0.18, "learning_rate": 0.0008273150377146526, "loss": 0.0836, "theoretical_loss": 3.8702720779937607, "tokens_seen": 597164032 }, { "epoch": 0.18, "learning_rate": 0.0008272347937730702, "loss": 0.079, "theoretical_loss": 3.8700960017340833, "tokens_seen": 597426176 }, { "epoch": 0.18, "learning_rate": 0.0008271545498314877, "loss": 0.0797, "theoretical_loss": 3.8699200243398493, "tokens_seen": 597688320 }, { "epoch": 0.18, "learning_rate": 0.0008270743058899053, "loss": 0.0845, "theoretical_loss": 3.8697441457122204, "tokens_seen": 597950464 }, { "epoch": 0.18, "learning_rate": 0.0008269940619483229, "loss": 0.0819, "theoretical_loss": 3.8695683657525013, "tokens_seen": 598212608 }, { "epoch": 0.18, "learning_rate": 0.0008269138180067404, "loss": 0.0823, "theoretical_loss": 3.8693926843621376, "tokens_seen": 598474752 }, { "epoch": 0.18, "learning_rate": 0.0008268335740651582, "loss": 0.0802, "theoretical_loss": 3.8692171014427177, "tokens_seen": 598736896 }, { "epoch": 0.18, "learning_rate": 0.0008267533301235757, "loss": 0.0794, "theoretical_loss": 3.86904161689597, "tokens_seen": 598999040 }, { "epoch": 0.18, "learning_rate": 0.0008266730861819933, "loss": 0.0828, "theoretical_loss": 3.868866230623766, "tokens_seen": 599261184 }, { "epoch": 0.18, "learning_rate": 0.0008265928422404109, "loss": 0.0806, "theoretical_loss": 3.8686909425281146, "tokens_seen": 599523328 }, { "epoch": 0.18, "learning_rate": 0.0008265125982988284, "loss": 0.081, "theoretical_loss": 3.8685157525111684, "tokens_seen": 599785472 }, { "epoch": 0.18, "learning_rate": 0.000826432354357246, "loss": 0.0814, "theoretical_loss": 3.8683406604752184, "tokens_seen": 600047616 }, { "epoch": 0.18, "learning_rate": 0.0008263521104156636, "loss": 0.0827, "theoretical_loss": 3.868165666322696, "tokens_seen": 600309760 }, { "epoch": 0.18, "learning_rate": 0.0008262718664740812, "loss": 0.0826, "theoretical_loss": 3.8679907699561733, "tokens_seen": 600571904 }, { "epoch": 0.18, "learning_rate": 0.0008261916225324988, "loss": 0.0819, "theoretical_loss": 3.86781597127836, "tokens_seen": 600834048 }, { "epoch": 0.18, "learning_rate": 0.0008261113785909165, "loss": 0.0824, "theoretical_loss": 3.867641270192107, "tokens_seen": 601096192 }, { "epoch": 0.18, "learning_rate": 0.000826031134649334, "loss": 0.0831, "theoretical_loss": 3.867466666600402, "tokens_seen": 601358336 }, { "epoch": 0.18, "learning_rate": 0.0008259508907077516, "loss": 0.0815, "theoretical_loss": 3.867292160406373, "tokens_seen": 601620480 }, { "epoch": 0.18, "learning_rate": 0.0008258706467661692, "loss": 0.0796, "theoretical_loss": 3.8671177515132857, "tokens_seen": 601882624 }, { "epoch": 0.18, "learning_rate": 0.0008257904028245867, "loss": 0.0821, "theoretical_loss": 3.866943439824545, "tokens_seen": 602144768 }, { "epoch": 0.18, "learning_rate": 0.0008257101588830044, "loss": 0.0797, "theoretical_loss": 3.8667692252436914, "tokens_seen": 602406912 }, { "epoch": 0.18, "learning_rate": 0.0008256299149414219, "loss": 0.0818, "theoretical_loss": 3.8665951076744056, "tokens_seen": 602669056 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0010131277376785874, "objective/train/docs_used": 224736, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.561912178993225, "objective/train/original_loss": 1.5619122982025146, "objective/train/theoretical_loss": 3.866421087020504, "objective/train/tokens_used": 623391200, "objective/train/value_avg": -0.006183624267578125, "objective/train/value_loss": 0.00039571512024849653, "objective/train/value_max": -0.00018966197967529297, "objective/train/value_min": -0.95263671875, "objective/train/value_reward_corr": 0.493842246751972, "objective/train/value_std": 0.01300811767578125, "objective/train/weight_avg": 1.0011917352676392, "objective/train/weighted_lm_loss": 1.5638322830200195, "objective/train/weights_max": 2.0086898803710938, "objective/train/weights_min": 0.36912351846694946, "theoretical_loss": 3.866421087020504, "tokens_seen": 602931200 }, { "epoch": 0.18, "learning_rate": 0.0008255496709998395, "loss": 0.0792, "theoretical_loss": 3.866421087020504, "tokens_seen": 602931200 }, { "epoch": 0.18, "learning_rate": 0.0008254694270582571, "loss": 0.0765, "theoretical_loss": 3.8662471631859407, "tokens_seen": 603193344 }, { "epoch": 0.18, "learning_rate": 0.0008253891831166747, "loss": 0.0819, "theoretical_loss": 3.866073336074807, "tokens_seen": 603455488 }, { "epoch": 0.18, "learning_rate": 0.0008253089391750923, "loss": 0.0785, "theoretical_loss": 3.8658996055913297, "tokens_seen": 603717632 }, { "epoch": 0.18, "learning_rate": 0.0008252286952335099, "loss": 0.0833, "theoretical_loss": 3.8657259716398737, "tokens_seen": 603979776 }, { "epoch": 0.18, "learning_rate": 0.0008251484512919275, "loss": 0.0798, "theoretical_loss": 3.8655524341249388, "tokens_seen": 604241920 }, { "epoch": 0.18, "learning_rate": 0.000825068207350345, "loss": 0.0825, "theoretical_loss": 3.865378992951161, "tokens_seen": 604504064 }, { "epoch": 0.18, "learning_rate": 0.0008249879634087627, "loss": 0.0808, "theoretical_loss": 3.865205648023311, "tokens_seen": 604766208 }, { "epoch": 0.18, "learning_rate": 0.0008249077194671802, "loss": 0.0818, "theoretical_loss": 3.8650323992462963, "tokens_seen": 605028352 }, { "epoch": 0.18, "learning_rate": 0.0008248274755255978, "loss": 0.0803, "theoretical_loss": 3.8648592465251586, "tokens_seen": 605290496 }, { "epoch": 0.18, "learning_rate": 0.0008247472315840154, "loss": 0.0813, "theoretical_loss": 3.864686189765075, "tokens_seen": 605552640 }, { "epoch": 0.18, "learning_rate": 0.000824666987642433, "loss": 0.0815, "theoretical_loss": 3.864513228871357, "tokens_seen": 605814784 }, { "epoch": 0.18, "learning_rate": 0.0008245867437008507, "loss": 0.0804, "theoretical_loss": 3.8643403637494504, "tokens_seen": 606076928 }, { "epoch": 0.18, "learning_rate": 0.0008245064997592682, "loss": 0.0794, "theoretical_loss": 3.8641675943049343, "tokens_seen": 606339072 }, { "epoch": 0.18, "learning_rate": 0.0008244262558176858, "loss": 0.0827, "theoretical_loss": 3.863994920443523, "tokens_seen": 606601216 }, { "epoch": 0.18, "learning_rate": 0.0008243460118761034, "loss": 0.0811, "theoretical_loss": 3.8638223420710647, "tokens_seen": 606863360 }, { "epoch": 0.18, "learning_rate": 0.000824265767934521, "loss": 0.0786, "theoretical_loss": 3.863649859093538, "tokens_seen": 607125504 }, { "epoch": 0.18, "learning_rate": 0.0008241855239929385, "loss": 0.0793, "theoretical_loss": 3.863477471417059, "tokens_seen": 607387648 }, { "epoch": 0.18, "learning_rate": 0.0008241052800513561, "loss": 0.0818, "theoretical_loss": 3.8633051789478734, "tokens_seen": 607649792 }, { "epoch": 0.18, "learning_rate": 0.0008240250361097737, "loss": 0.0838, "theoretical_loss": 3.8631329815923605, "tokens_seen": 607911936 }, { "epoch": 0.18, "learning_rate": 0.0008239447921681912, "loss": 0.0787, "theoretical_loss": 3.862960879257032, "tokens_seen": 608174080 }, { "epoch": 0.18, "learning_rate": 0.000823864548226609, "loss": 0.0828, "theoretical_loss": 3.8627888718485313, "tokens_seen": 608436224 }, { "epoch": 0.18, "learning_rate": 0.0008237843042850265, "loss": 0.0792, "theoretical_loss": 3.862616959273635, "tokens_seen": 608698368 }, { "epoch": 0.18, "learning_rate": 0.0008237040603434442, "loss": 0.0799, "theoretical_loss": 3.8624451414392498, "tokens_seen": 608960512 }, { "epoch": 0.18, "learning_rate": 0.0008236238164018617, "loss": 0.0782, "theoretical_loss": 3.8622734182524154, "tokens_seen": 609222656 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0017172880470752716, "objective/train/docs_used": 227108, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5490598678588867, "objective/train/original_loss": 1.5490598678588867, "objective/train/theoretical_loss": 3.8621017896203007, "objective/train/tokens_used": 629944800, "objective/train/value_avg": -0.007549285888671875, "objective/train/value_loss": 0.00015352350601460785, "objective/train/value_max": -0.0002378225326538086, "objective/train/value_min": -0.423095703125, "objective/train/value_reward_corr": 0.6703752245048074, "objective/train/value_std": 0.011810302734375, "objective/train/weight_avg": 1.0017893314361572, "objective/train/weighted_lm_loss": 1.5520132780075073, "objective/train/weights_max": 1.3398975133895874, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.8621017896203007, "tokens_seen": 609484800 }, { "epoch": 0.18, "learning_rate": 0.0008235435724602792, "loss": 0.0801, "theoretical_loss": 3.8621017896203007, "tokens_seen": 609484800 }, { "epoch": 0.18, "learning_rate": 0.0008234633285186969, "loss": 0.0791, "theoretical_loss": 3.8619302554502077, "tokens_seen": 609746944 }, { "epoch": 0.18, "learning_rate": 0.0008233830845771144, "loss": 0.0783, "theoretical_loss": 3.8617588156495666, "tokens_seen": 610009088 }, { "epoch": 0.18, "learning_rate": 0.000823302840635532, "loss": 0.0826, "theoretical_loss": 3.861587470125941, "tokens_seen": 610271232 }, { "epoch": 0.19, "learning_rate": 0.0008232225966939496, "loss": 0.0807, "theoretical_loss": 3.8614162187870216, "tokens_seen": 610533376 }, { "epoch": 0.19, "learning_rate": 0.0008231423527523673, "loss": 0.0801, "theoretical_loss": 3.861245061540631, "tokens_seen": 610795520 }, { "epoch": 0.19, "learning_rate": 0.0008230621088107848, "loss": 0.0803, "theoretical_loss": 3.8610739982947218, "tokens_seen": 611057664 }, { "epoch": 0.19, "learning_rate": 0.0008229818648692024, "loss": 0.077, "theoretical_loss": 3.8609030289573747, "tokens_seen": 611319808 }, { "epoch": 0.19, "learning_rate": 0.00082290162092762, "loss": 0.0822, "theoretical_loss": 3.8607321534368007, "tokens_seen": 611581952 }, { "epoch": 0.19, "learning_rate": 0.0008228213769860375, "loss": 0.077, "theoretical_loss": 3.8605613716413396, "tokens_seen": 611844096 }, { "epoch": 0.19, "learning_rate": 0.0008227411330444552, "loss": 0.082, "theoretical_loss": 3.860390683479459, "tokens_seen": 612106240 }, { "epoch": 0.19, "learning_rate": 0.0008226608891028727, "loss": 0.0775, "theoretical_loss": 3.860220088859757, "tokens_seen": 612368384 }, { "epoch": 0.19, "learning_rate": 0.0008225806451612903, "loss": 0.0771, "theoretical_loss": 3.860049587690958, "tokens_seen": 612630528 }, { "epoch": 0.19, "learning_rate": 0.0008225004012197079, "loss": 0.0796, "theoretical_loss": 3.8598791798819154, "tokens_seen": 612892672 }, { "epoch": 0.19, "learning_rate": 0.0008224201572781254, "loss": 0.0767, "theoretical_loss": 3.859708865341611, "tokens_seen": 613154816 }, { "epoch": 0.19, "learning_rate": 0.0008223399133365432, "loss": 0.0815, "theoretical_loss": 3.8595386439791532, "tokens_seen": 613416960 }, { "epoch": 0.19, "learning_rate": 0.0008222596693949607, "loss": 0.0792, "theoretical_loss": 3.859368515703778, "tokens_seen": 613679104 }, { "epoch": 0.19, "learning_rate": 0.0008221794254533783, "loss": 0.0777, "theoretical_loss": 3.859198480424849, "tokens_seen": 613941248 }, { "epoch": 0.19, "learning_rate": 0.0008220991815117959, "loss": 0.0783, "theoretical_loss": 3.859028538051856, "tokens_seen": 614203392 }, { "epoch": 0.19, "learning_rate": 0.0008220189375702135, "loss": 0.0797, "theoretical_loss": 3.858858688494416, "tokens_seen": 614465536 }, { "epoch": 0.19, "learning_rate": 0.000821938693628631, "loss": 0.0789, "theoretical_loss": 3.8586889316622726, "tokens_seen": 614727680 }, { "epoch": 0.19, "learning_rate": 0.0008218584496870486, "loss": 0.0758, "theoretical_loss": 3.8585192674652955, "tokens_seen": 614989824 }, { "epoch": 0.19, "learning_rate": 0.0008217782057454662, "loss": 0.0785, "theoretical_loss": 3.8583496958134793, "tokens_seen": 615251968 }, { "epoch": 0.19, "learning_rate": 0.0008216979618038837, "loss": 0.0766, "theoretical_loss": 3.8581802166169457, "tokens_seen": 615514112 }, { "epoch": 0.19, "learning_rate": 0.0008216177178623015, "loss": 0.0798, "theoretical_loss": 3.8580108297859415, "tokens_seen": 615776256 }, { "epoch": 0.19, "objective/train/advantage_avg": -0.00047024516970850527, "objective/train/docs_used": 229528, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.600695252418518, "objective/train/original_loss": 1.6006953716278076, "objective/train/theoretical_loss": 3.857841535230839, "objective/train/tokens_used": 636498400, "objective/train/value_avg": -0.01091766357421875, "objective/train/value_loss": 0.00041529766167514026, "objective/train/value_max": -0.0002033710479736328, "objective/train/value_min": -0.474853515625, "objective/train/value_reward_corr": 0.8363627909970652, "objective/train/value_std": 0.0230560302734375, "objective/train/weight_avg": 0.9997208714485168, "objective/train/weighted_lm_loss": 1.6006137132644653, "objective/train/weights_max": 1.2145042419433594, "objective/train/weights_min": 0.23296229541301727, "theoretical_loss": 3.857841535230839, "tokens_seen": 616038400 }, { "epoch": 0.19, "learning_rate": 0.000821537473920719, "loss": 0.0775, "theoretical_loss": 3.857841535230839, "tokens_seen": 616038400 }, { "epoch": 0.19, "learning_rate": 0.0008214572299791366, "loss": 0.0789, "theoretical_loss": 3.8576723328621347, "tokens_seen": 616300544 }, { "epoch": 0.19, "learning_rate": 0.0008213769860375542, "loss": 0.0791, "theoretical_loss": 3.8575032225904513, "tokens_seen": 616562688 }, { "epoch": 0.19, "learning_rate": 0.0008212967420959717, "loss": 0.0773, "theoretical_loss": 3.8573342043265346, "tokens_seen": 616824832 }, { "epoch": 0.19, "learning_rate": 0.0008212164981543894, "loss": 0.083, "theoretical_loss": 3.857165277981256, "tokens_seen": 617086976 }, { "epoch": 0.19, "learning_rate": 0.0008211362542128069, "loss": 0.0808, "theoretical_loss": 3.8569964434656105, "tokens_seen": 617349120 }, { "epoch": 0.19, "learning_rate": 0.0008210560102712245, "loss": 0.0807, "theoretical_loss": 3.856827700690718, "tokens_seen": 617611264 }, { "epoch": 0.19, "learning_rate": 0.0008209757663296422, "loss": 0.0794, "theoretical_loss": 3.8566590495678192, "tokens_seen": 617873408 }, { "epoch": 0.19, "learning_rate": 0.0008208955223880598, "loss": 0.0816, "theoretical_loss": 3.856490490008282, "tokens_seen": 618135552 }, { "epoch": 0.19, "learning_rate": 0.0008208152784464773, "loss": 0.0807, "theoretical_loss": 3.856322021923595, "tokens_seen": 618397696 }, { "epoch": 0.19, "learning_rate": 0.000820735034504895, "loss": 0.0798, "theoretical_loss": 3.8561536452253713, "tokens_seen": 618659840 }, { "epoch": 0.19, "learning_rate": 0.0008206547905633125, "loss": 0.0769, "theoretical_loss": 3.855985359825346, "tokens_seen": 618921984 }, { "epoch": 0.19, "learning_rate": 0.00082057454662173, "loss": 0.0824, "theoretical_loss": 3.855817165635377, "tokens_seen": 619184128 }, { "epoch": 0.19, "learning_rate": 0.0008204943026801477, "loss": 0.0818, "theoretical_loss": 3.8556490625674447, "tokens_seen": 619446272 }, { "epoch": 0.19, "learning_rate": 0.0008204140587385652, "loss": 0.0812, "theoretical_loss": 3.855481050533651, "tokens_seen": 619708416 }, { "epoch": 0.19, "learning_rate": 0.0008203338147969828, "loss": 0.0813, "theoretical_loss": 3.8553131294462206, "tokens_seen": 619970560 }, { "epoch": 0.19, "learning_rate": 0.0008202535708554004, "loss": 0.0813, "theoretical_loss": 3.8551452992175, "tokens_seen": 620232704 }, { "epoch": 0.19, "learning_rate": 0.0008201733269138181, "loss": 0.0813, "theoretical_loss": 3.8549775597599556, "tokens_seen": 620494848 }, { "epoch": 0.19, "learning_rate": 0.0008200930829722356, "loss": 0.0786, "theoretical_loss": 3.8548099109861775, "tokens_seen": 620756992 }, { "epoch": 0.19, "learning_rate": 0.0008200128390306532, "loss": 0.0808, "theoretical_loss": 3.8546423528088747, "tokens_seen": 621019136 }, { "epoch": 0.19, "learning_rate": 0.0008199325950890708, "loss": 0.0801, "theoretical_loss": 3.8544748851408777, "tokens_seen": 621281280 }, { "epoch": 0.19, "learning_rate": 0.0008198523511474884, "loss": 0.078, "theoretical_loss": 3.8543075078951388, "tokens_seen": 621543424 }, { "epoch": 0.19, "learning_rate": 0.000819772107205906, "loss": 0.078, "theoretical_loss": 3.8541402209847284, "tokens_seen": 621805568 }, { "epoch": 0.19, "learning_rate": 0.0008196918632643235, "loss": 0.0815, "theoretical_loss": 3.8539730243228387, "tokens_seen": 622067712 }, { "epoch": 0.19, "learning_rate": 0.0008196116193227412, "loss": 0.0774, "theoretical_loss": 3.8538059178227817, "tokens_seen": 622329856 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.00011773208098020405, "objective/train/docs_used": 231894, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6076971292495728, "objective/train/original_loss": 1.6076970100402832, "objective/train/theoretical_loss": 3.8536389013979893, "objective/train/tokens_used": 643052000, "objective/train/value_avg": -0.0093994140625, "objective/train/value_loss": 0.00031335835228674114, "objective/train/value_max": -0.00016868114471435547, "objective/train/value_min": -0.638671875, "objective/train/value_reward_corr": 0.6932073575502529, "objective/train/value_std": 0.01538848876953125, "objective/train/weight_avg": 1.000260829925537, "objective/train/weighted_lm_loss": 1.6074447631835938, "objective/train/weights_max": 1.6733942031860352, "objective/train/weights_min": 0.37257105112075806, "theoretical_loss": 3.8536389013979893, "tokens_seen": 622592000 }, { "epoch": 0.19, "learning_rate": 0.0008195313753811587, "loss": 0.0831, "theoretical_loss": 3.8536389013979893, "tokens_seen": 622592000 }, { "epoch": 0.19, "learning_rate": 0.0008194511314395762, "loss": 0.0791, "theoretical_loss": 3.8534719749620114, "tokens_seen": 622854144 }, { "epoch": 0.19, "learning_rate": 0.000819370887497994, "loss": 0.0785, "theoretical_loss": 3.8533051384285195, "tokens_seen": 623116288 }, { "epoch": 0.19, "learning_rate": 0.0008192906435564115, "loss": 0.0811, "theoretical_loss": 3.853138391711303, "tokens_seen": 623378432 }, { "epoch": 0.19, "learning_rate": 0.0008192103996148291, "loss": 0.079, "theoretical_loss": 3.852971734724269, "tokens_seen": 623640576 }, { "epoch": 0.19, "learning_rate": 0.0008191301556732467, "loss": 0.0785, "theoretical_loss": 3.8528051673814456, "tokens_seen": 623902720 }, { "epoch": 0.19, "learning_rate": 0.0008190499117316643, "loss": 0.0806, "theoretical_loss": 3.8526386895969775, "tokens_seen": 624164864 }, { "epoch": 0.19, "learning_rate": 0.0008189696677900818, "loss": 0.0821, "theoretical_loss": 3.8524723012851294, "tokens_seen": 624427008 }, { "epoch": 0.19, "learning_rate": 0.0008188894238484994, "loss": 0.0807, "theoretical_loss": 3.852306002360282, "tokens_seen": 624689152 }, { "epoch": 0.19, "learning_rate": 0.000818809179906917, "loss": 0.0811, "theoretical_loss": 3.852139792736936, "tokens_seen": 624951296 }, { "epoch": 0.19, "learning_rate": 0.0008187289359653345, "loss": 0.0784, "theoretical_loss": 3.8519736723297067, "tokens_seen": 625213440 }, { "epoch": 0.19, "learning_rate": 0.0008186486920237523, "loss": 0.0789, "theoretical_loss": 3.8518076410533304, "tokens_seen": 625475584 }, { "epoch": 0.19, "learning_rate": 0.0008185684480821698, "loss": 0.0786, "theoretical_loss": 3.8516416988226574, "tokens_seen": 625737728 }, { "epoch": 0.19, "learning_rate": 0.0008184882041405875, "loss": 0.079, "theoretical_loss": 3.851475845552658, "tokens_seen": 625999872 }, { "epoch": 0.19, "learning_rate": 0.000818407960199005, "loss": 0.0818, "theoretical_loss": 3.8513100811584158, "tokens_seen": 626262016 }, { "epoch": 0.19, "learning_rate": 0.0008183277162574225, "loss": 0.0815, "theoretical_loss": 3.851144405555134, "tokens_seen": 626524160 }, { "epoch": 0.19, "learning_rate": 0.0008182474723158402, "loss": 0.0787, "theoretical_loss": 3.85097881865813, "tokens_seen": 626786304 }, { "epoch": 0.19, "learning_rate": 0.0008181672283742577, "loss": 0.0807, "theoretical_loss": 3.850813320382839, "tokens_seen": 627048448 }, { "epoch": 0.19, "learning_rate": 0.0008180869844326753, "loss": 0.0809, "theoretical_loss": 3.8506479106448115, "tokens_seen": 627310592 }, { "epoch": 0.19, "learning_rate": 0.000818006740491093, "loss": 0.0823, "theoretical_loss": 3.8504825893597134, "tokens_seen": 627572736 }, { "epoch": 0.19, "learning_rate": 0.0008179264965495106, "loss": 0.0802, "theoretical_loss": 3.850317356443326, "tokens_seen": 627834880 }, { "epoch": 0.19, "learning_rate": 0.0008178462526079281, "loss": 0.0787, "theoretical_loss": 3.8501522118115465, "tokens_seen": 628097024 }, { "epoch": 0.19, "learning_rate": 0.0008177660086663458, "loss": 0.0824, "theoretical_loss": 3.8499871553803873, "tokens_seen": 628359168 }, { "epoch": 0.19, "learning_rate": 0.0008176857647247633, "loss": 0.0804, "theoretical_loss": 3.8498221870659743, "tokens_seen": 628621312 }, { "epoch": 0.19, "learning_rate": 0.0008176055207831808, "loss": 0.0761, "theoretical_loss": 3.8496573067845503, "tokens_seen": 628883456 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0010866678785532713, "objective/train/docs_used": 234355, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5178707838058472, "objective/train/original_loss": 1.5178709030151367, "objective/train/theoretical_loss": 3.8494925144524705, "objective/train/tokens_used": 649605600, "objective/train/value_avg": -0.0084991455078125, "objective/train/value_loss": 0.0005936273955740035, "objective/train/value_max": -0.00015115737915039062, "objective/train/value_min": -0.94189453125, "objective/train/value_reward_corr": 0.5752126539490294, "objective/train/value_std": 0.0167694091796875, "objective/train/weight_avg": 1.001339316368103, "objective/train/weighted_lm_loss": 1.5189374685287476, "objective/train/weights_max": 1.8356927633285522, "objective/train/weights_min": 0.36808282136917114, "theoretical_loss": 3.8494925144524705, "tokens_seen": 629145600 }, { "epoch": 0.19, "learning_rate": 0.0008175252768415985, "loss": 0.081, "theoretical_loss": 3.8494925144524705, "tokens_seen": 629145600 }, { "epoch": 0.19, "learning_rate": 0.000817445032900016, "loss": 0.0801, "theoretical_loss": 3.849327809986206, "tokens_seen": 629407744 }, { "epoch": 0.19, "learning_rate": 0.0008173647889584337, "loss": 0.0767, "theoretical_loss": 3.8491631933023407, "tokens_seen": 629669888 }, { "epoch": 0.19, "learning_rate": 0.0008172845450168512, "loss": 0.0814, "theoretical_loss": 3.8489986643175733, "tokens_seen": 629932032 }, { "epoch": 0.19, "learning_rate": 0.0008172043010752689, "loss": 0.08, "theoretical_loss": 3.8488342229487156, "tokens_seen": 630194176 }, { "epoch": 0.19, "learning_rate": 0.0008171240571336865, "loss": 0.0819, "theoretical_loss": 3.8486698691126935, "tokens_seen": 630456320 }, { "epoch": 0.19, "learning_rate": 0.000817043813192104, "loss": 0.0806, "theoretical_loss": 3.8485056027265454, "tokens_seen": 630718464 }, { "epoch": 0.19, "learning_rate": 0.0008169635692505216, "loss": 0.0798, "theoretical_loss": 3.848341423707423, "tokens_seen": 630980608 }, { "epoch": 0.19, "learning_rate": 0.0008168833253089392, "loss": 0.0788, "theoretical_loss": 3.8481773319725914, "tokens_seen": 631242752 }, { "epoch": 0.19, "learning_rate": 0.0008168030813673568, "loss": 0.0829, "theoretical_loss": 3.8480133274394275, "tokens_seen": 631504896 }, { "epoch": 0.19, "learning_rate": 0.0008167228374257743, "loss": 0.076, "theoretical_loss": 3.8478494100254217, "tokens_seen": 631767040 }, { "epoch": 0.19, "learning_rate": 0.000816642593484192, "loss": 0.0823, "theoretical_loss": 3.847685579648176, "tokens_seen": 632029184 }, { "epoch": 0.19, "learning_rate": 0.0008165623495426095, "loss": 0.0821, "theoretical_loss": 3.847521836225404, "tokens_seen": 632291328 }, { "epoch": 0.19, "learning_rate": 0.000816482105601027, "loss": 0.0825, "theoretical_loss": 3.8473581796749317, "tokens_seen": 632553472 }, { "epoch": 0.19, "learning_rate": 0.0008164018616594448, "loss": 0.077, "theoretical_loss": 3.8471946099146983, "tokens_seen": 632815616 }, { "epoch": 0.19, "learning_rate": 0.0008163216177178623, "loss": 0.079, "theoretical_loss": 3.847031126862751, "tokens_seen": 633077760 }, { "epoch": 0.19, "learning_rate": 0.0008162413737762799, "loss": 0.0788, "theoretical_loss": 3.8468677304372507, "tokens_seen": 633339904 }, { "epoch": 0.19, "learning_rate": 0.0008161611298346975, "loss": 0.0777, "theoretical_loss": 3.8467044205564704, "tokens_seen": 633602048 }, { "epoch": 0.19, "learning_rate": 0.0008160808858931151, "loss": 0.0799, "theoretical_loss": 3.846541197138791, "tokens_seen": 633864192 }, { "epoch": 0.19, "learning_rate": 0.0008160006419515327, "loss": 0.0836, "theoretical_loss": 3.8463780601027056, "tokens_seen": 634126336 }, { "epoch": 0.19, "learning_rate": 0.0008159203980099502, "loss": 0.0784, "theoretical_loss": 3.846215009366819, "tokens_seen": 634388480 }, { "epoch": 0.19, "learning_rate": 0.0008158401540683678, "loss": 0.0809, "theoretical_loss": 3.846052044849843, "tokens_seen": 634650624 }, { "epoch": 0.19, "learning_rate": 0.0008157599101267855, "loss": 0.0793, "theoretical_loss": 3.845889166470604, "tokens_seen": 634912768 }, { "epoch": 0.19, "learning_rate": 0.0008156796661852031, "loss": 0.08, "theoretical_loss": 3.8457263741480343, "tokens_seen": 635174912 }, { "epoch": 0.19, "learning_rate": 0.0008155994222436206, "loss": 0.0787, "theoretical_loss": 3.845563667801178, "tokens_seen": 635437056 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0005161234876140952, "objective/train/docs_used": 236526, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5930426120758057, "objective/train/original_loss": 1.5930426120758057, "objective/train/theoretical_loss": 3.8454010473491884, "objective/train/tokens_used": 656159200, "objective/train/value_avg": -0.007137298583984375, "objective/train/value_loss": 0.00014894510968588293, "objective/train/value_max": -0.00016224384307861328, "objective/train/value_min": -0.2247314453125, "objective/train/value_reward_corr": 0.6557688653083844, "objective/train/value_std": 0.01020050048828125, "objective/train/weight_avg": 1.0005874633789062, "objective/train/weighted_lm_loss": 1.5939151048660278, "objective/train/weights_max": 1.1805914640426636, "objective/train/weights_min": 0.3701472282409668, "theoretical_loss": 3.8454010473491884, "tokens_seen": 635699200 }, { "epoch": 0.19, "learning_rate": 0.0008155191783020383, "loss": 0.0807, "theoretical_loss": 3.8454010473491884, "tokens_seen": 635699200 }, { "epoch": 0.19, "learning_rate": 0.0008154389343604558, "loss": 0.0791, "theoretical_loss": 3.845238512711327, "tokens_seen": 635961344 }, { "epoch": 0.19, "learning_rate": 0.0008153586904188733, "loss": 0.0815, "theoretical_loss": 3.845076063806966, "tokens_seen": 636223488 }, { "epoch": 0.19, "learning_rate": 0.000815278446477291, "loss": 0.0828, "theoretical_loss": 3.844913700555586, "tokens_seen": 636485632 }, { "epoch": 0.19, "learning_rate": 0.0008151982025357085, "loss": 0.0779, "theoretical_loss": 3.8447514228767763, "tokens_seen": 636747776 }, { "epoch": 0.19, "learning_rate": 0.0008151179585941261, "loss": 0.0827, "theoretical_loss": 3.844589230690234, "tokens_seen": 637009920 }, { "epoch": 0.19, "learning_rate": 0.0008150377146525437, "loss": 0.0793, "theoretical_loss": 3.844427123915766, "tokens_seen": 637272064 }, { "epoch": 0.19, "learning_rate": 0.0008149574707109614, "loss": 0.0779, "theoretical_loss": 3.8442651024732863, "tokens_seen": 637534208 }, { "epoch": 0.19, "learning_rate": 0.000814877226769379, "loss": 0.078, "theoretical_loss": 3.8441031662828173, "tokens_seen": 637796352 }, { "epoch": 0.19, "learning_rate": 0.0008147969828277966, "loss": 0.0796, "theoretical_loss": 3.843941315264489, "tokens_seen": 638058496 }, { "epoch": 0.19, "learning_rate": 0.0008147167388862141, "loss": 0.0786, "theoretical_loss": 3.8437795493385387, "tokens_seen": 638320640 }, { "epoch": 0.19, "learning_rate": 0.0008146364949446317, "loss": 0.0812, "theoretical_loss": 3.8436178684253126, "tokens_seen": 638582784 }, { "epoch": 0.19, "learning_rate": 0.0008145562510030493, "loss": 0.0816, "theoretical_loss": 3.843456272445262, "tokens_seen": 638844928 }, { "epoch": 0.19, "learning_rate": 0.0008144760070614668, "loss": 0.0778, "theoretical_loss": 3.8432947613189468, "tokens_seen": 639107072 }, { "epoch": 0.19, "learning_rate": 0.0008143957631198845, "loss": 0.0821, "theoretical_loss": 3.8431333349670336, "tokens_seen": 639369216 }, { "epoch": 0.19, "learning_rate": 0.000814315519178302, "loss": 0.0785, "theoretical_loss": 3.842971993310294, "tokens_seen": 639631360 }, { "epoch": 0.19, "learning_rate": 0.0008142352752367197, "loss": 0.0789, "theoretical_loss": 3.8428107362696085, "tokens_seen": 639893504 }, { "epoch": 0.19, "learning_rate": 0.0008141550312951373, "loss": 0.0778, "theoretical_loss": 3.842649563765962, "tokens_seen": 640155648 }, { "epoch": 0.19, "learning_rate": 0.0008140747873535548, "loss": 0.0815, "theoretical_loss": 3.8424884757204474, "tokens_seen": 640417792 }, { "epoch": 0.19, "learning_rate": 0.0008139945434119724, "loss": 0.0797, "theoretical_loss": 3.8423274720542606, "tokens_seen": 640679936 }, { "epoch": 0.19, "learning_rate": 0.00081391429947039, "loss": 0.0784, "theoretical_loss": 3.842166552688706, "tokens_seen": 640942080 }, { "epoch": 0.19, "learning_rate": 0.0008138340555288076, "loss": 0.0803, "theoretical_loss": 3.8420057175451934, "tokens_seen": 641204224 }, { "epoch": 0.19, "learning_rate": 0.0008137538115872251, "loss": 0.0807, "theoretical_loss": 3.841844966545236, "tokens_seen": 641466368 }, { "epoch": 0.19, "learning_rate": 0.0008136735676456428, "loss": 0.08, "theoretical_loss": 3.841684299610453, "tokens_seen": 641728512 }, { "epoch": 0.19, "learning_rate": 0.0008135933237040603, "loss": 0.0785, "theoretical_loss": 3.8415237166625698, "tokens_seen": 641990656 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.00029344300855882466, "objective/train/docs_used": 239059, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.705520510673523, "objective/train/original_loss": 1.7055203914642334, "objective/train/theoretical_loss": 3.8413632176234147, "objective/train/tokens_used": 662712800, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.0002510190533939749, "objective/train/value_max": -0.00013768672943115234, "objective/train/value_min": -0.431396484375, "objective/train/value_reward_corr": 0.6089659714411729, "objective/train/value_std": 0.0107879638671875, "objective/train/weight_avg": 1.000407338142395, "objective/train/weighted_lm_loss": 1.704552412033081, "objective/train/weights_max": 1.1669175624847412, "objective/train/weights_min": 0.24382223188877106, "theoretical_loss": 3.8413632176234147, "tokens_seen": 642252800 }, { "epoch": 0.19, "learning_rate": 0.000813513079762478, "loss": 0.0791, "theoretical_loss": 3.8413632176234147, "tokens_seen": 642252800 }, { "epoch": 0.19, "learning_rate": 0.0008134328358208956, "loss": 0.0832, "theoretical_loss": 3.8412028024149225, "tokens_seen": 642514944 }, { "epoch": 0.19, "learning_rate": 0.0008133525918793131, "loss": 0.0788, "theoretical_loss": 3.841042470959131, "tokens_seen": 642777088 }, { "epoch": 0.19, "learning_rate": 0.0008132723479377308, "loss": 0.0788, "theoretical_loss": 3.840882223178183, "tokens_seen": 643039232 }, { "epoch": 0.19, "learning_rate": 0.0008131921039961483, "loss": 0.0809, "theoretical_loss": 3.8407220589943254, "tokens_seen": 643301376 }, { "epoch": 0.2, "learning_rate": 0.0008131118600545659, "loss": 0.0775, "theoretical_loss": 3.8405619783299083, "tokens_seen": 643563520 }, { "epoch": 0.2, "learning_rate": 0.0008130316161129835, "loss": 0.0781, "theoretical_loss": 3.8404019811073864, "tokens_seen": 643825664 }, { "epoch": 0.2, "learning_rate": 0.000812951372171401, "loss": 0.0784, "theoretical_loss": 3.840242067249317, "tokens_seen": 644087808 }, { "epoch": 0.2, "learning_rate": 0.0008128711282298186, "loss": 0.0782, "theoretical_loss": 3.840082236678362, "tokens_seen": 644349952 }, { "epoch": 0.2, "learning_rate": 0.0008127908842882363, "loss": 0.0797, "theoretical_loss": 3.8399224893172854, "tokens_seen": 644612096 }, { "epoch": 0.2, "learning_rate": 0.0008127106403466539, "loss": 0.0789, "theoretical_loss": 3.839762825088955, "tokens_seen": 644874240 }, { "epoch": 0.2, "learning_rate": 0.0008126303964050714, "loss": 0.0828, "theoretical_loss": 3.8396032439163394, "tokens_seen": 645136384 }, { "epoch": 0.2, "learning_rate": 0.0008125501524634891, "loss": 0.0803, "theoretical_loss": 3.8394437457225132, "tokens_seen": 645398528 }, { "epoch": 0.2, "learning_rate": 0.0008124699085219066, "loss": 0.0831, "theoretical_loss": 3.839284330430651, "tokens_seen": 645660672 }, { "epoch": 0.2, "learning_rate": 0.0008123896645803241, "loss": 0.0806, "theoretical_loss": 3.8391249979640305, "tokens_seen": 645922816 }, { "epoch": 0.2, "learning_rate": 0.0008123094206387418, "loss": 0.0798, "theoretical_loss": 3.8389657482460313, "tokens_seen": 646184960 }, { "epoch": 0.2, "learning_rate": 0.0008122291766971593, "loss": 0.0771, "theoretical_loss": 3.838806581200134, "tokens_seen": 646447104 }, { "epoch": 0.2, "learning_rate": 0.000812148932755577, "loss": 0.0806, "theoretical_loss": 3.838647496749924, "tokens_seen": 646709248 }, { "epoch": 0.2, "learning_rate": 0.0008120686888139945, "loss": 0.0815, "theoretical_loss": 3.8384884948190847, "tokens_seen": 646971392 }, { "epoch": 0.2, "learning_rate": 0.0008119884448724122, "loss": 0.0805, "theoretical_loss": 3.838329575331403, "tokens_seen": 647233536 }, { "epoch": 0.2, "learning_rate": 0.0008119082009308298, "loss": 0.0795, "theoretical_loss": 3.8381707382107657, "tokens_seen": 647495680 }, { "epoch": 0.2, "learning_rate": 0.0008118279569892473, "loss": 0.0834, "theoretical_loss": 3.838011983381162, "tokens_seen": 647757824 }, { "epoch": 0.2, "learning_rate": 0.0008117477130476649, "loss": 0.0788, "theoretical_loss": 3.8378533107666817, "tokens_seen": 648019968 }, { "epoch": 0.2, "learning_rate": 0.0008116674691060825, "loss": 0.0831, "theoretical_loss": 3.8376947202915144, "tokens_seen": 648282112 }, { "epoch": 0.2, "learning_rate": 0.0008115872251645001, "loss": 0.0797, "theoretical_loss": 3.8375362118799505, "tokens_seen": 648544256 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.0012510496890172362, "objective/train/docs_used": 241544, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5836241245269775, "objective/train/original_loss": 1.5836241245269775, "objective/train/theoretical_loss": 3.8373777854563813, "objective/train/tokens_used": 669266400, "objective/train/value_avg": -0.00818634033203125, "objective/train/value_loss": 0.00038118340307846665, "objective/train/value_max": -0.0001150369644165039, "objective/train/value_min": -0.9169921875, "objective/train/value_reward_corr": 0.6984226450131933, "objective/train/value_std": 0.016143798828125, "objective/train/weight_avg": 1.0014175176620483, "objective/train/weighted_lm_loss": 1.5856133699417114, "objective/train/weights_max": 1.2598052024841309, "objective/train/weights_min": 0.25575754046440125, "theoretical_loss": 3.8373777854563813, "tokens_seen": 648806400 }, { "epoch": 0.2, "learning_rate": 0.0008115069812229176, "loss": 0.0792, "theoretical_loss": 3.8373777854563813, "tokens_seen": 648806400 }, { "epoch": 0.2, "learning_rate": 0.0008114267372813353, "loss": 0.0814, "theoretical_loss": 3.837219440945298, "tokens_seen": 649068544 }, { "epoch": 0.2, "learning_rate": 0.0008113464933397528, "loss": 0.0798, "theoretical_loss": 3.8370611782712922, "tokens_seen": 649330688 }, { "epoch": 0.2, "learning_rate": 0.0008112662493981705, "loss": 0.0821, "theoretical_loss": 3.8369029973590543, "tokens_seen": 649592832 }, { "epoch": 0.2, "learning_rate": 0.0008111860054565881, "loss": 0.0793, "theoretical_loss": 3.836744898133376, "tokens_seen": 649854976 }, { "epoch": 0.2, "learning_rate": 0.0008111057615150056, "loss": 0.0786, "theoretical_loss": 3.8365868805191456, "tokens_seen": 650117120 }, { "epoch": 0.2, "learning_rate": 0.0008110255175734233, "loss": 0.0798, "theoretical_loss": 3.836428944441354, "tokens_seen": 650379264 }, { "epoch": 0.2, "learning_rate": 0.0008109452736318408, "loss": 0.0803, "theoretical_loss": 3.8362710898250896, "tokens_seen": 650641408 }, { "epoch": 0.2, "learning_rate": 0.0008108650296902584, "loss": 0.0817, "theoretical_loss": 3.83611331659554, "tokens_seen": 650903552 }, { "epoch": 0.2, "learning_rate": 0.000810784785748676, "loss": 0.0762, "theoretical_loss": 3.8359556246779913, "tokens_seen": 651165696 }, { "epoch": 0.2, "learning_rate": 0.0008107045418070936, "loss": 0.0826, "theoretical_loss": 3.8357980139978283, "tokens_seen": 651427840 }, { "epoch": 0.2, "learning_rate": 0.0008106242978655111, "loss": 0.0823, "theoretical_loss": 3.8356404844805354, "tokens_seen": 651689984 }, { "epoch": 0.2, "learning_rate": 0.0008105440539239288, "loss": 0.0812, "theoretical_loss": 3.835483036051694, "tokens_seen": 651952128 }, { "epoch": 0.2, "learning_rate": 0.0008104638099823464, "loss": 0.0799, "theoretical_loss": 3.835325668636983, "tokens_seen": 652214272 }, { "epoch": 0.2, "learning_rate": 0.0008103835660407639, "loss": 0.0815, "theoretical_loss": 3.8351683821621814, "tokens_seen": 652476416 }, { "epoch": 0.2, "learning_rate": 0.0008103033220991816, "loss": 0.0816, "theoretical_loss": 3.8350111765531647, "tokens_seen": 652738560 }, { "epoch": 0.2, "learning_rate": 0.0008102230781575991, "loss": 0.0796, "theoretical_loss": 3.834854051735906, "tokens_seen": 653000704 }, { "epoch": 0.2, "learning_rate": 0.0008101428342160167, "loss": 0.0823, "theoretical_loss": 3.8346970076364757, "tokens_seen": 653262848 }, { "epoch": 0.2, "learning_rate": 0.0008100625902744343, "loss": 0.0795, "theoretical_loss": 3.8345400441810424, "tokens_seen": 653524992 }, { "epoch": 0.2, "learning_rate": 0.0008099823463328518, "loss": 0.0816, "theoretical_loss": 3.8343831612958703, "tokens_seen": 653787136 }, { "epoch": 0.2, "learning_rate": 0.0008099021023912694, "loss": 0.0815, "theoretical_loss": 3.834226358907322, "tokens_seen": 654049280 }, { "epoch": 0.2, "learning_rate": 0.000809821858449687, "loss": 0.0816, "theoretical_loss": 3.8340696369418565, "tokens_seen": 654311424 }, { "epoch": 0.2, "learning_rate": 0.0008097416145081047, "loss": 0.0783, "theoretical_loss": 3.833912995326029, "tokens_seen": 654573568 }, { "epoch": 0.2, "learning_rate": 0.0008096613705665223, "loss": 0.0789, "theoretical_loss": 3.8337564339864914, "tokens_seen": 654835712 }, { "epoch": 0.2, "learning_rate": 0.0008095811266249399, "loss": 0.0798, "theoretical_loss": 3.8335999528499913, "tokens_seen": 655097856 }, { "debugging/Compilability": 0.9523809523809523, "debugging/distinct-1-grams": 0.7472213669987466, "debugging/entropy-1-grams": 5.482485850115401, "debugging/length": 435.9047619047619, "debugging/num_segments": 21, "debugging/raw_token_scores_avg": 0.0068328846246004105, "debugging/raw_token_scores_std": 0.01969163864850998, "debugging/score": 0.013429834759321626, "debugging/score_std": 0.01710154615280685, "epoch": 0.2, "objective/train/advantage_avg": 0.0014720445033162832, "objective/train/docs_used": 243976, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.65055251121521, "objective/train/original_loss": 1.65055251121521, "objective/train/theoretical_loss": 3.833443551843374, "objective/train/tokens_used": 675820000, "objective/train/value_avg": -0.00830841064453125, "objective/train/value_loss": 0.00020881656382698566, "objective/train/value_max": -0.00023055076599121094, "objective/train/value_min": -0.321533203125, "objective/train/value_reward_corr": 0.6838538864516404, "objective/train/value_std": 0.01381683349609375, "objective/train/weight_avg": 1.0015703439712524, "objective/train/weighted_lm_loss": 1.6532535552978516, "objective/train/weights_max": 1.1742674112319946, "objective/train/weights_min": 0.3706134557723999, "theoretical_loss": 3.833443551843374, "tokens_seen": 655360000 }, { "epoch": 0.2, "learning_rate": 0.0008095008826833574, "loss": 0.0823, "theoretical_loss": 3.833443551843374, "tokens_seen": 655360000 }, { "epoch": 0.2, "learning_rate": 0.000809420638741775, "loss": 0.0799, "theoretical_loss": 3.8332872308935793, "tokens_seen": 655622144 }, { "epoch": 0.2, "learning_rate": 0.0008093403948001926, "loss": 0.0809, "theoretical_loss": 3.833130989927643, "tokens_seen": 655884288 }, { "epoch": 0.2, "learning_rate": 0.0008092601508586101, "loss": 0.0794, "theoretical_loss": 3.8329748288726972, "tokens_seen": 656146432 }, { "epoch": 0.2, "learning_rate": 0.0008091799069170278, "loss": 0.0833, "theoretical_loss": 3.8328187476559687, "tokens_seen": 656408576 }, { "epoch": 0.2, "learning_rate": 0.0008090996629754453, "loss": 0.0818, "theoretical_loss": 3.83266274620478, "tokens_seen": 656670720 }, { "epoch": 0.2, "learning_rate": 0.000809019419033863, "loss": 0.0781, "theoretical_loss": 3.832506824446549, "tokens_seen": 656932864 }, { "epoch": 0.2, "learning_rate": 0.0008089391750922806, "loss": 0.0791, "theoretical_loss": 3.832350982308788, "tokens_seen": 657195008 }, { "epoch": 0.2, "learning_rate": 0.0008088589311506981, "loss": 0.0781, "theoretical_loss": 3.8321952197191043, "tokens_seen": 657457152 }, { "epoch": 0.2, "learning_rate": 0.0008087786872091157, "loss": 0.0813, "theoretical_loss": 3.8320395366052, "tokens_seen": 657719296 }, { "epoch": 0.2, "learning_rate": 0.0008086984432675333, "loss": 0.0791, "theoretical_loss": 3.8318839328948715, "tokens_seen": 657981440 }, { "epoch": 0.2, "learning_rate": 0.0008086181993259509, "loss": 0.0802, "theoretical_loss": 3.8317284085160095, "tokens_seen": 658243584 }, { "epoch": 0.2, "learning_rate": 0.0008085379553843684, "loss": 0.0802, "theoretical_loss": 3.8315729633965994, "tokens_seen": 658505728 }, { "epoch": 0.2, "learning_rate": 0.0008084577114427861, "loss": 0.0783, "theoretical_loss": 3.8314175974647195, "tokens_seen": 658767872 }, { "epoch": 0.2, "learning_rate": 0.0008083774675012036, "loss": 0.0803, "theoretical_loss": 3.831262310648544, "tokens_seen": 659030016 }, { "epoch": 0.2, "learning_rate": 0.0008082972235596214, "loss": 0.0813, "theoretical_loss": 3.831107102876338, "tokens_seen": 659292160 }, { "epoch": 0.2, "learning_rate": 0.0008082169796180389, "loss": 0.0825, "theoretical_loss": 3.830951974076463, "tokens_seen": 659554304 }, { "epoch": 0.2, "learning_rate": 0.0008081367356764564, "loss": 0.0821, "theoretical_loss": 3.830796924177371, "tokens_seen": 659816448 }, { "epoch": 0.2, "learning_rate": 0.0008080564917348741, "loss": 0.079, "theoretical_loss": 3.830641953107609, "tokens_seen": 660078592 }, { "epoch": 0.2, "learning_rate": 0.0008079762477932916, "loss": 0.0803, "theoretical_loss": 3.8304870607958175, "tokens_seen": 660340736 }, { "epoch": 0.2, "learning_rate": 0.0008078960038517092, "loss": 0.0804, "theoretical_loss": 3.8303322471707286, "tokens_seen": 660602880 }, { "epoch": 0.2, "learning_rate": 0.0008078157599101268, "loss": 0.0782, "theoretical_loss": 3.830177512161167, "tokens_seen": 660865024 }, { "epoch": 0.2, "learning_rate": 0.0008077355159685444, "loss": 0.0815, "theoretical_loss": 3.8300228556960523, "tokens_seen": 661127168 }, { "epoch": 0.2, "learning_rate": 0.0008076552720269619, "loss": 0.0774, "theoretical_loss": 3.829868277704393, "tokens_seen": 661389312 }, { "epoch": 0.2, "learning_rate": 0.0008075750280853796, "loss": 0.0806, "theoretical_loss": 3.829713778115293, "tokens_seen": 661651456 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00017224658222403377, "objective/train/docs_used": 246530, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7009872198104858, "objective/train/original_loss": 1.7009871006011963, "objective/train/theoretical_loss": 3.8295593568579465, "objective/train/tokens_used": 682373600, "objective/train/value_avg": -0.01042938232421875, "objective/train/value_loss": 0.00039018821553327143, "objective/train/value_max": -0.0001634359359741211, "objective/train/value_min": -0.291748046875, "objective/train/value_reward_corr": 0.6474135841444115, "objective/train/value_std": 0.01617431640625, "objective/train/weight_avg": 1.0003470182418823, "objective/train/weighted_lm_loss": 1.7011173963546753, "objective/train/weights_max": 1.2405765056610107, "objective/train/weights_min": 0.3682652413845062, "theoretical_loss": 3.8295593568579465, "tokens_seen": 661913600 }, { "epoch": 0.2, "learning_rate": 0.0008074947841437972, "loss": 0.0814, "theoretical_loss": 3.8295593568579465, "tokens_seen": 661913600 }, { "epoch": 0.2, "learning_rate": 0.0008074145402022147, "loss": 0.0811, "theoretical_loss": 3.8294050138616402, "tokens_seen": 662175744 }, { "epoch": 0.2, "learning_rate": 0.0008073342962606324, "loss": 0.0806, "theoretical_loss": 3.8292507490557526, "tokens_seen": 662437888 }, { "epoch": 0.2, "learning_rate": 0.0008072540523190499, "loss": 0.0764, "theoretical_loss": 3.8290965623697537, "tokens_seen": 662700032 }, { "epoch": 0.2, "learning_rate": 0.0008071738083774676, "loss": 0.078, "theoretical_loss": 3.8289424537332053, "tokens_seen": 662962176 }, { "epoch": 0.2, "learning_rate": 0.0008070935644358851, "loss": 0.0825, "theoretical_loss": 3.82878842307576, "tokens_seen": 663224320 }, { "epoch": 0.2, "learning_rate": 0.0008070133204943026, "loss": 0.0794, "theoretical_loss": 3.828634470327162, "tokens_seen": 663486464 }, { "epoch": 0.2, "learning_rate": 0.0008069330765527203, "loss": 0.0805, "theoretical_loss": 3.8284805954172474, "tokens_seen": 663748608 }, { "epoch": 0.2, "learning_rate": 0.0008068528326111379, "loss": 0.081, "theoretical_loss": 3.828326798275941, "tokens_seen": 664010752 }, { "epoch": 0.2, "learning_rate": 0.0008067725886695555, "loss": 0.0793, "theoretical_loss": 3.82817307883326, "tokens_seen": 664272896 }, { "epoch": 0.2, "learning_rate": 0.0008066923447279731, "loss": 0.0795, "theoretical_loss": 3.8280194370193112, "tokens_seen": 664535040 }, { "epoch": 0.2, "learning_rate": 0.0008066121007863907, "loss": 0.0796, "theoretical_loss": 3.827865872764293, "tokens_seen": 664797184 }, { "epoch": 0.2, "learning_rate": 0.0008065318568448082, "loss": 0.0799, "theoretical_loss": 3.8277123859984936, "tokens_seen": 665059328 }, { "epoch": 0.2, "learning_rate": 0.0008064516129032258, "loss": 0.0811, "theoretical_loss": 3.8275589766522895, "tokens_seen": 665321472 }, { "epoch": 0.2, "learning_rate": 0.0008063713689616434, "loss": 0.0794, "theoretical_loss": 3.8274056446561504, "tokens_seen": 665583616 }, { "epoch": 0.2, "learning_rate": 0.0008062911250200609, "loss": 0.0768, "theoretical_loss": 3.827252389940633, "tokens_seen": 665845760 }, { "epoch": 0.2, "learning_rate": 0.0008062108810784786, "loss": 0.0787, "theoretical_loss": 3.827099212436386, "tokens_seen": 666107904 }, { "epoch": 0.2, "learning_rate": 0.0008061306371368961, "loss": 0.0801, "theoretical_loss": 3.826946112074145, "tokens_seen": 666370048 }, { "epoch": 0.2, "learning_rate": 0.0008060503931953138, "loss": 0.0789, "theoretical_loss": 3.826793088784737, "tokens_seen": 666632192 }, { "epoch": 0.2, "learning_rate": 0.0008059701492537314, "loss": 0.0816, "theoretical_loss": 3.826640142499077, "tokens_seen": 666894336 }, { "epoch": 0.2, "learning_rate": 0.0008058899053121489, "loss": 0.0782, "theoretical_loss": 3.8264872731481705, "tokens_seen": 667156480 }, { "epoch": 0.2, "learning_rate": 0.0008058096613705666, "loss": 0.0785, "theoretical_loss": 3.8263344806631103, "tokens_seen": 667418624 }, { "epoch": 0.2, "learning_rate": 0.0008057294174289841, "loss": 0.0802, "theoretical_loss": 3.8261817649750784, "tokens_seen": 667680768 }, { "epoch": 0.2, "learning_rate": 0.0008056491734874017, "loss": 0.0778, "theoretical_loss": 3.8260291260153463, "tokens_seen": 667942912 }, { "epoch": 0.2, "learning_rate": 0.0008055689295458193, "loss": 0.0796, "theoretical_loss": 3.8258765637152727, "tokens_seen": 668205056 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.0008796442998573184, "objective/train/docs_used": 248744, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6819145679473877, "objective/train/original_loss": 1.6819145679473877, "objective/train/theoretical_loss": 3.8257240780063055, "objective/train/tokens_used": 688927200, "objective/train/value_avg": -0.007282257080078125, "objective/train/value_loss": 0.00022768820053897798, "objective/train/value_max": -0.00021314620971679688, "objective/train/value_min": -0.29150390625, "objective/train/value_reward_corr": 0.70498133906636, "objective/train/value_std": 0.01126861572265625, "objective/train/weight_avg": 1.0009862184524536, "objective/train/weighted_lm_loss": 1.6842213869094849, "objective/train/weights_max": 1.2005013227462769, "objective/train/weights_min": 0.37098827958106995, "theoretical_loss": 3.8257240780063055, "tokens_seen": 668467200 }, { "epoch": 0.2, "learning_rate": 0.0008054886856042369, "loss": 0.0805, "theoretical_loss": 3.8257240780063055, "tokens_seen": 668467200 }, { "epoch": 0.2, "learning_rate": 0.0008054084416626544, "loss": 0.0779, "theoretical_loss": 3.8255716688199803, "tokens_seen": 668729344 }, { "epoch": 0.2, "learning_rate": 0.0008053281977210722, "loss": 0.0788, "theoretical_loss": 3.825419336087921, "tokens_seen": 668991488 }, { "epoch": 0.2, "learning_rate": 0.0008052479537794897, "loss": 0.0786, "theoretical_loss": 3.825267079741839, "tokens_seen": 669253632 }, { "epoch": 0.2, "learning_rate": 0.0008051677098379072, "loss": 0.0792, "theoretical_loss": 3.825114899713533, "tokens_seen": 669515776 }, { "epoch": 0.2, "learning_rate": 0.0008050874658963249, "loss": 0.0808, "theoretical_loss": 3.8249627959348915, "tokens_seen": 669777920 }, { "epoch": 0.2, "learning_rate": 0.0008050072219547424, "loss": 0.0788, "theoretical_loss": 3.824810768337887, "tokens_seen": 670040064 }, { "epoch": 0.2, "learning_rate": 0.00080492697801316, "loss": 0.0798, "theoretical_loss": 3.8246588168545816, "tokens_seen": 670302208 }, { "epoch": 0.2, "learning_rate": 0.0008048467340715776, "loss": 0.0745, "theoretical_loss": 3.824506941417125, "tokens_seen": 670564352 }, { "epoch": 0.2, "learning_rate": 0.0008047664901299952, "loss": 0.0809, "theoretical_loss": 3.824355141957752, "tokens_seen": 670826496 }, { "epoch": 0.2, "learning_rate": 0.0008046862461884128, "loss": 0.0798, "theoretical_loss": 3.8242034184087847, "tokens_seen": 671088640 }, { "epoch": 0.2, "learning_rate": 0.0008046060022468304, "loss": 0.0772, "theoretical_loss": 3.824051770702633, "tokens_seen": 671350784 }, { "epoch": 0.2, "learning_rate": 0.000804525758305248, "loss": 0.0819, "theoretical_loss": 3.823900198771792, "tokens_seen": 671612928 }, { "epoch": 0.2, "learning_rate": 0.0008044455143636656, "loss": 0.0765, "theoretical_loss": 3.823748702548845, "tokens_seen": 671875072 }, { "epoch": 0.2, "learning_rate": 0.0008043652704220832, "loss": 0.0772, "theoretical_loss": 3.823597281966459, "tokens_seen": 672137216 }, { "epoch": 0.2, "learning_rate": 0.0008042850264805007, "loss": 0.0785, "theoretical_loss": 3.8234459369573894, "tokens_seen": 672399360 }, { "epoch": 0.2, "learning_rate": 0.0008042047825389184, "loss": 0.0792, "theoretical_loss": 3.8232946674544763, "tokens_seen": 672661504 }, { "epoch": 0.2, "learning_rate": 0.0008041245385973359, "loss": 0.0802, "theoretical_loss": 3.8231434733906458, "tokens_seen": 672923648 }, { "epoch": 0.2, "learning_rate": 0.0008040442946557534, "loss": 0.0799, "theoretical_loss": 3.822992354698911, "tokens_seen": 673185792 }, { "epoch": 0.2, "learning_rate": 0.0008039640507141711, "loss": 0.0783, "theoretical_loss": 3.822841311312368, "tokens_seen": 673447936 }, { "epoch": 0.2, "learning_rate": 0.0008038838067725887, "loss": 0.0791, "theoretical_loss": 3.8226903431642008, "tokens_seen": 673710080 }, { "epoch": 0.2, "learning_rate": 0.0008038035628310063, "loss": 0.0789, "theoretical_loss": 3.8225394501876764, "tokens_seen": 673972224 }, { "epoch": 0.2, "learning_rate": 0.0008037233188894239, "loss": 0.0798, "theoretical_loss": 3.822388632316149, "tokens_seen": 674234368 }, { "epoch": 0.2, "learning_rate": 0.0008036430749478415, "loss": 0.0776, "theoretical_loss": 3.822237889483057, "tokens_seen": 674496512 }, { "epoch": 0.2, "learning_rate": 0.000803562831006259, "loss": 0.0791, "theoretical_loss": 3.822087221621923, "tokens_seen": 674758656 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00036516584805212915, "objective/train/docs_used": 251058, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5383315086364746, "objective/train/original_loss": 1.5383315086364746, "objective/train/theoretical_loss": 3.821936628666355, "objective/train/tokens_used": 695480800, "objective/train/value_avg": -0.0069732666015625, "objective/train/value_loss": 0.00019810259982477874, "objective/train/value_max": -0.0002269744873046875, "objective/train/value_min": -0.9658203125, "objective/train/value_reward_corr": 0.6702206529549529, "objective/train/value_std": 0.0112762451171875, "objective/train/weight_avg": 1.0004541873931885, "objective/train/weighted_lm_loss": 1.5389385223388672, "objective/train/weights_max": 1.1621463298797607, "objective/train/weights_min": 0.23926687240600586, "theoretical_loss": 3.821936628666355, "tokens_seen": 675020800 }, { "epoch": 0.2, "learning_rate": 0.0008034825870646766, "loss": 0.075, "theoretical_loss": 3.821936628666355, "tokens_seen": 675020800 }, { "epoch": 0.2, "learning_rate": 0.0008034023431230942, "loss": 0.0775, "theoretical_loss": 3.821786110550045, "tokens_seen": 675282944 }, { "epoch": 0.2, "learning_rate": 0.0008033220991815118, "loss": 0.0766, "theoretical_loss": 3.8216356672067704, "tokens_seen": 675545088 }, { "epoch": 0.2, "learning_rate": 0.0008032418552399294, "loss": 0.0791, "theoretical_loss": 3.821485298570391, "tokens_seen": 675807232 }, { "epoch": 0.2, "learning_rate": 0.000803161611298347, "loss": 0.0771, "theoretical_loss": 3.8213350045748533, "tokens_seen": 676069376 }, { "epoch": 0.2, "learning_rate": 0.0008030813673567647, "loss": 0.0784, "theoretical_loss": 3.821184785154186, "tokens_seen": 676331520 }, { "epoch": 0.21, "learning_rate": 0.0008030011234151822, "loss": 0.0789, "theoretical_loss": 3.8210346402425013, "tokens_seen": 676593664 }, { "epoch": 0.21, "learning_rate": 0.0008029208794735997, "loss": 0.0787, "theoretical_loss": 3.820884569773997, "tokens_seen": 676855808 }, { "epoch": 0.21, "learning_rate": 0.0008028406355320174, "loss": 0.0768, "theoretical_loss": 3.8207345736829526, "tokens_seen": 677117952 }, { "epoch": 0.21, "learning_rate": 0.0008027603915904349, "loss": 0.0778, "theoretical_loss": 3.820584651903732, "tokens_seen": 677380096 }, { "epoch": 0.21, "learning_rate": 0.0008026801476488525, "loss": 0.0782, "theoretical_loss": 3.820434804370782, "tokens_seen": 677642240 }, { "epoch": 0.21, "learning_rate": 0.0008025999037072701, "loss": 0.0778, "theoretical_loss": 3.820285031018633, "tokens_seen": 677904384 }, { "epoch": 0.21, "learning_rate": 0.0008025196597656877, "loss": 0.0785, "theoretical_loss": 3.8201353317818985, "tokens_seen": 678166528 }, { "epoch": 0.21, "learning_rate": 0.0008024394158241052, "loss": 0.0764, "theoretical_loss": 3.819985706595274, "tokens_seen": 678428672 }, { "epoch": 0.21, "learning_rate": 0.0008023591718825229, "loss": 0.0815, "theoretical_loss": 3.8198361553935385, "tokens_seen": 678690816 }, { "epoch": 0.21, "learning_rate": 0.0008022789279409405, "loss": 0.0812, "theoretical_loss": 3.8196866781115526, "tokens_seen": 678952960 }, { "epoch": 0.21, "learning_rate": 0.000802198683999358, "loss": 0.0828, "theoretical_loss": 3.8195372746842615, "tokens_seen": 679215104 }, { "epoch": 0.21, "learning_rate": 0.0008021184400577757, "loss": 0.0766, "theoretical_loss": 3.8193879450466905, "tokens_seen": 679477248 }, { "epoch": 0.21, "learning_rate": 0.0008020381961161932, "loss": 0.0797, "theoretical_loss": 3.819238689133948, "tokens_seen": 679739392 }, { "epoch": 0.21, "learning_rate": 0.0008019579521746109, "loss": 0.0793, "theoretical_loss": 3.819089506881225, "tokens_seen": 680001536 }, { "epoch": 0.21, "learning_rate": 0.0008018777082330284, "loss": 0.0789, "theoretical_loss": 3.8189403982237935, "tokens_seen": 680263680 }, { "epoch": 0.21, "learning_rate": 0.000801797464291446, "loss": 0.0772, "theoretical_loss": 3.818791363097008, "tokens_seen": 680525824 }, { "epoch": 0.21, "learning_rate": 0.0008017172203498636, "loss": 0.0787, "theoretical_loss": 3.818642401436304, "tokens_seen": 680787968 }, { "epoch": 0.21, "learning_rate": 0.0008016369764082812, "loss": 0.0791, "theoretical_loss": 3.8184935131771987, "tokens_seen": 681050112 }, { "epoch": 0.21, "learning_rate": 0.0008015567324666988, "loss": 0.0784, "theoretical_loss": 3.8183446982552915, "tokens_seen": 681312256 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0005019235541112721, "objective/train/docs_used": 253498, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.601072072982788, "objective/train/original_loss": 1.6010721921920776, "objective/train/theoretical_loss": 3.818195956606262, "objective/train/tokens_used": 702034400, "objective/train/value_avg": -0.006389617919921875, "objective/train/value_loss": 0.00024236510216724128, "objective/train/value_max": -0.0001366138458251953, "objective/train/value_min": -0.63427734375, "objective/train/value_reward_corr": 0.4653045941913805, "objective/train/value_std": 0.0096893310546875, "objective/train/weight_avg": 1.0006093978881836, "objective/train/weighted_lm_loss": 1.6022027730941772, "objective/train/weights_max": 1.7492560148239136, "objective/train/weights_min": 0.37081003189086914, "theoretical_loss": 3.818195956606262, "tokens_seen": 681574400 }, { "epoch": 0.21, "learning_rate": 0.0008014764885251164, "loss": 0.0804, "theoretical_loss": 3.818195956606262, "tokens_seen": 681574400 }, { "epoch": 0.21, "learning_rate": 0.000801396244583534, "loss": 0.0799, "theoretical_loss": 3.8180472881658707, "tokens_seen": 681836544 }, { "epoch": 0.21, "learning_rate": 0.0008013160006419515, "loss": 0.0805, "theoretical_loss": 3.817898692869961, "tokens_seen": 682098688 }, { "epoch": 0.21, "learning_rate": 0.0008012357567003692, "loss": 0.0809, "theoretical_loss": 3.817750170654455, "tokens_seen": 682360832 }, { "epoch": 0.21, "learning_rate": 0.0008011555127587867, "loss": 0.0793, "theoretical_loss": 3.8176017214553575, "tokens_seen": 682622976 }, { "epoch": 0.21, "learning_rate": 0.0008010752688172042, "loss": 0.0801, "theoretical_loss": 3.8174533452087513, "tokens_seen": 682885120 }, { "epoch": 0.21, "learning_rate": 0.0008009950248756219, "loss": 0.0771, "theoretical_loss": 3.817305041850802, "tokens_seen": 683147264 }, { "epoch": 0.21, "learning_rate": 0.0008009147809340394, "loss": 0.0804, "theoretical_loss": 3.817156811317754, "tokens_seen": 683409408 }, { "epoch": 0.21, "learning_rate": 0.0008008345369924572, "loss": 0.0799, "theoretical_loss": 3.8170086535459333, "tokens_seen": 683671552 }, { "epoch": 0.21, "learning_rate": 0.0008007542930508747, "loss": 0.0797, "theoretical_loss": 3.8168605684717454, "tokens_seen": 683933696 }, { "epoch": 0.21, "learning_rate": 0.0008006740491092923, "loss": 0.0796, "theoretical_loss": 3.816712556031675, "tokens_seen": 684195840 }, { "epoch": 0.21, "learning_rate": 0.0008005938051677099, "loss": 0.0788, "theoretical_loss": 3.816564616162287, "tokens_seen": 684457984 }, { "epoch": 0.21, "learning_rate": 0.0008005135612261274, "loss": 0.0776, "theoretical_loss": 3.8164167488002265, "tokens_seen": 684720128 }, { "epoch": 0.21, "learning_rate": 0.000800433317284545, "loss": 0.0795, "theoretical_loss": 3.816268953882218, "tokens_seen": 684982272 }, { "epoch": 0.21, "learning_rate": 0.0008003530733429626, "loss": 0.0775, "theoretical_loss": 3.8161212313450648, "tokens_seen": 685244416 }, { "epoch": 0.21, "learning_rate": 0.0008002728294013802, "loss": 0.079, "theoretical_loss": 3.81597358112565, "tokens_seen": 685506560 }, { "epoch": 0.21, "learning_rate": 0.0008001925854597977, "loss": 0.0796, "theoretical_loss": 3.815826003160935, "tokens_seen": 685768704 }, { "epoch": 0.21, "learning_rate": 0.0008001123415182155, "loss": 0.0809, "theoretical_loss": 3.815678497387962, "tokens_seen": 686030848 }, { "epoch": 0.21, "learning_rate": 0.000800032097576633, "loss": 0.0785, "theoretical_loss": 3.8155310637438506, "tokens_seen": 686292992 }, { "epoch": 0.21, "learning_rate": 0.0007999518536350505, "loss": 0.0787, "theoretical_loss": 3.8153837021657995, "tokens_seen": 686555136 }, { "epoch": 0.21, "learning_rate": 0.0007998716096934682, "loss": 0.0774, "theoretical_loss": 3.8152364125910863, "tokens_seen": 686817280 }, { "epoch": 0.21, "learning_rate": 0.0007997913657518857, "loss": 0.0779, "theoretical_loss": 3.8150891949570664, "tokens_seen": 687079424 }, { "epoch": 0.21, "learning_rate": 0.0007997111218103033, "loss": 0.078, "theoretical_loss": 3.814942049201175, "tokens_seen": 687341568 }, { "epoch": 0.21, "learning_rate": 0.0007996308778687209, "loss": 0.0812, "theoretical_loss": 3.8147949752609236, "tokens_seen": 687603712 }, { "epoch": 0.21, "learning_rate": 0.0007995506339271385, "loss": 0.0787, "theoretical_loss": 3.814647973073903, "tokens_seen": 687865856 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0018313382752239704, "objective/train/docs_used": 255827, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5675907135009766, "objective/train/original_loss": 1.5675907135009766, "objective/train/theoretical_loss": 3.8145010425777826, "objective/train/tokens_used": 708588000, "objective/train/value_avg": -0.01007080078125, "objective/train/value_loss": 0.00023178788251243532, "objective/train/value_max": -0.0001499652862548828, "objective/train/value_min": -0.5947265625, "objective/train/value_reward_corr": 0.7783312114068648, "objective/train/value_std": 0.018707275390625, "objective/train/weight_avg": 1.0019445419311523, "objective/train/weighted_lm_loss": 1.5707687139511108, "objective/train/weights_max": 1.2117313146591187, "objective/train/weights_min": 0.6250301003456116, "theoretical_loss": 3.8145010425777826, "tokens_seen": 688128000 }, { "epoch": 0.21, "learning_rate": 0.0007994703899855562, "loss": 0.0784, "theoretical_loss": 3.8145010425777826, "tokens_seen": 688128000 }, { "epoch": 0.21, "learning_rate": 0.0007993901460439737, "loss": 0.0793, "theoretical_loss": 3.814354183710308, "tokens_seen": 688390144 }, { "epoch": 0.21, "learning_rate": 0.0007993099021023913, "loss": 0.0793, "theoretical_loss": 3.8142073964093046, "tokens_seen": 688652288 }, { "epoch": 0.21, "learning_rate": 0.0007992296581608089, "loss": 0.0781, "theoretical_loss": 3.8140606806126733, "tokens_seen": 688914432 }, { "epoch": 0.21, "learning_rate": 0.0007991494142192265, "loss": 0.0792, "theoretical_loss": 3.813914036258393, "tokens_seen": 689176576 }, { "epoch": 0.21, "learning_rate": 0.000799069170277644, "loss": 0.0786, "theoretical_loss": 3.813767463284522, "tokens_seen": 689438720 }, { "epoch": 0.21, "learning_rate": 0.0007989889263360617, "loss": 0.0793, "theoretical_loss": 3.8136209616291934, "tokens_seen": 689700864 }, { "epoch": 0.21, "learning_rate": 0.0007989086823944792, "loss": 0.0794, "theoretical_loss": 3.813474531230618, "tokens_seen": 689963008 }, { "epoch": 0.21, "learning_rate": 0.0007988284384528968, "loss": 0.0782, "theoretical_loss": 3.8133281720270835, "tokens_seen": 690225152 }, { "epoch": 0.21, "learning_rate": 0.0007987481945113144, "loss": 0.0802, "theoretical_loss": 3.8131818839569562, "tokens_seen": 690487296 }, { "epoch": 0.21, "learning_rate": 0.000798667950569732, "loss": 0.0804, "theoretical_loss": 3.8130356669586765, "tokens_seen": 690749440 }, { "epoch": 0.21, "learning_rate": 0.0007985877066281496, "loss": 0.0765, "theoretical_loss": 3.8128895209707627, "tokens_seen": 691011584 }, { "epoch": 0.21, "learning_rate": 0.0007985074626865672, "loss": 0.0817, "theoretical_loss": 3.81274344593181, "tokens_seen": 691273728 }, { "epoch": 0.21, "learning_rate": 0.0007984272187449848, "loss": 0.0773, "theoretical_loss": 3.8125974417804893, "tokens_seen": 691535872 }, { "epoch": 0.21, "learning_rate": 0.0007983469748034023, "loss": 0.0765, "theoretical_loss": 3.8124515084555477, "tokens_seen": 691798016 }, { "epoch": 0.21, "learning_rate": 0.00079826673086182, "loss": 0.0782, "theoretical_loss": 3.8123056458958087, "tokens_seen": 692060160 }, { "epoch": 0.21, "learning_rate": 0.0007981864869202375, "loss": 0.0776, "theoretical_loss": 3.812159854040172, "tokens_seen": 692322304 }, { "epoch": 0.21, "learning_rate": 0.0007981062429786551, "loss": 0.0793, "theoretical_loss": 3.8120141328276125, "tokens_seen": 692584448 }, { "epoch": 0.21, "learning_rate": 0.0007980259990370727, "loss": 0.0808, "theoretical_loss": 3.8118684821971813, "tokens_seen": 692846592 }, { "epoch": 0.21, "learning_rate": 0.0007979457550954902, "loss": 0.0803, "theoretical_loss": 3.8117229020880057, "tokens_seen": 693108736 }, { "epoch": 0.21, "learning_rate": 0.000797865511153908, "loss": 0.08, "theoretical_loss": 3.811577392439287, "tokens_seen": 693370880 }, { "epoch": 0.21, "learning_rate": 0.0007977852672123255, "loss": 0.08, "theoretical_loss": 3.8114319531903025, "tokens_seen": 693633024 }, { "epoch": 0.21, "learning_rate": 0.0007977050232707431, "loss": 0.0802, "theoretical_loss": 3.811286584280406, "tokens_seen": 693895168 }, { "epoch": 0.21, "learning_rate": 0.0007976247793291607, "loss": 0.0797, "theoretical_loss": 3.8111412856490245, "tokens_seen": 694157312 }, { "epoch": 0.21, "learning_rate": 0.0007975445353875782, "loss": 0.08, "theoretical_loss": 3.810996057235661, "tokens_seen": 694419456 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.001043860800564289, "objective/train/docs_used": 258341, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6668003797531128, "objective/train/original_loss": 1.6668004989624023, "objective/train/theoretical_loss": 3.810850898979894, "objective/train/tokens_used": 715141600, "objective/train/value_avg": -0.0081634521484375, "objective/train/value_loss": 0.00030596103169955313, "objective/train/value_max": -0.00021827220916748047, "objective/train/value_min": -0.765625, "objective/train/value_reward_corr": 0.5807898030559664, "objective/train/value_std": 0.01239776611328125, "objective/train/weight_avg": 1.001173973083496, "objective/train/weighted_lm_loss": 1.668200969696045, "objective/train/weights_max": 1.4868050813674927, "objective/train/weights_min": 0.22793063521385193, "theoretical_loss": 3.810850898979894, "tokens_seen": 694681600 }, { "epoch": 0.21, "learning_rate": 0.0007974642914459958, "loss": 0.0797, "theoretical_loss": 3.810850898979894, "tokens_seen": 694681600 }, { "epoch": 0.21, "learning_rate": 0.0007973840475044134, "loss": 0.0806, "theoretical_loss": 3.810705810821375, "tokens_seen": 694943744 }, { "epoch": 0.21, "learning_rate": 0.000797303803562831, "loss": 0.0794, "theoretical_loss": 3.8105607926998326, "tokens_seen": 695205888 }, { "epoch": 0.21, "learning_rate": 0.0007972235596212485, "loss": 0.0782, "theoretical_loss": 3.810415844555067, "tokens_seen": 695468032 }, { "epoch": 0.21, "learning_rate": 0.0007971433156796663, "loss": 0.0775, "theoretical_loss": 3.8102709663269554, "tokens_seen": 695730176 }, { "epoch": 0.21, "learning_rate": 0.0007970630717380838, "loss": 0.0788, "theoretical_loss": 3.810126157955448, "tokens_seen": 695992320 }, { "epoch": 0.21, "learning_rate": 0.0007969828277965014, "loss": 0.0777, "theoretical_loss": 3.809981419380569, "tokens_seen": 696254464 }, { "epoch": 0.21, "learning_rate": 0.000796902583854919, "loss": 0.0796, "theoretical_loss": 3.809836750542418, "tokens_seen": 696516608 }, { "epoch": 0.21, "learning_rate": 0.0007968223399133365, "loss": 0.08, "theoretical_loss": 3.8096921513811663, "tokens_seen": 696778752 }, { "epoch": 0.21, "learning_rate": 0.0007967420959717542, "loss": 0.08, "theoretical_loss": 3.809547621837061, "tokens_seen": 697040896 }, { "epoch": 0.21, "learning_rate": 0.0007966618520301717, "loss": 0.0763, "theoretical_loss": 3.809403161850423, "tokens_seen": 697303040 }, { "epoch": 0.21, "learning_rate": 0.0007965816080885893, "loss": 0.0822, "theoretical_loss": 3.8092587713616446, "tokens_seen": 697565184 }, { "epoch": 0.21, "learning_rate": 0.000796501364147007, "loss": 0.0808, "theoretical_loss": 3.809114450311193, "tokens_seen": 697827328 }, { "epoch": 0.21, "learning_rate": 0.0007964211202054245, "loss": 0.0796, "theoretical_loss": 3.808970198639609, "tokens_seen": 698089472 }, { "epoch": 0.21, "learning_rate": 0.0007963408762638421, "loss": 0.0778, "theoretical_loss": 3.808826016287507, "tokens_seen": 698351616 }, { "epoch": 0.21, "learning_rate": 0.0007962606323222597, "loss": 0.0802, "theoretical_loss": 3.8086819031955725, "tokens_seen": 698613760 }, { "epoch": 0.21, "learning_rate": 0.0007961803883806773, "loss": 0.0786, "theoretical_loss": 3.8085378593045665, "tokens_seen": 698875904 }, { "epoch": 0.21, "learning_rate": 0.0007961001444390948, "loss": 0.0816, "theoretical_loss": 3.8083938845553202, "tokens_seen": 699138048 }, { "epoch": 0.21, "learning_rate": 0.0007960199004975125, "loss": 0.0778, "theoretical_loss": 3.80824997888874, "tokens_seen": 699400192 }, { "epoch": 0.21, "learning_rate": 0.00079593965655593, "loss": 0.0807, "theoretical_loss": 3.8081061422458036, "tokens_seen": 699662336 }, { "epoch": 0.21, "learning_rate": 0.0007958594126143476, "loss": 0.0784, "theoretical_loss": 3.8079623745675613, "tokens_seen": 699924480 }, { "epoch": 0.21, "learning_rate": 0.0007957791686727652, "loss": 0.0752, "theoretical_loss": 3.8078186757951364, "tokens_seen": 700186624 }, { "epoch": 0.21, "learning_rate": 0.0007956989247311828, "loss": 0.0805, "theoretical_loss": 3.8076750458697237, "tokens_seen": 700448768 }, { "epoch": 0.21, "learning_rate": 0.0007956186807896005, "loss": 0.0788, "theoretical_loss": 3.807531484732591, "tokens_seen": 700710912 }, { "epoch": 0.21, "learning_rate": 0.000795538436848018, "loss": 0.0793, "theoretical_loss": 3.8073879923250775, "tokens_seen": 700973056 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0004075298202224076, "objective/train/docs_used": 260260, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5826354026794434, "objective/train/original_loss": 1.5826352834701538, "objective/train/theoretical_loss": 3.8072445685885947, "objective/train/tokens_used": 721695200, "objective/train/value_avg": -0.00783538818359375, "objective/train/value_loss": 0.00022773307864554226, "objective/train/value_max": -0.00013136863708496094, "objective/train/value_min": -0.63134765625, "objective/train/value_reward_corr": 0.635312677935756, "objective/train/value_std": 0.01226806640625, "objective/train/weight_avg": 1.0005120038986206, "objective/train/weighted_lm_loss": 1.5832562446594238, "objective/train/weights_max": 1.4071308374404907, "objective/train/weights_min": 0.36930587887763977, "theoretical_loss": 3.8072445685885947, "tokens_seen": 701235200 }, { "epoch": 0.21, "learning_rate": 0.0007954581929064356, "loss": 0.0774, "theoretical_loss": 3.8072445685885947, "tokens_seen": 701235200 }, { "epoch": 0.21, "learning_rate": 0.0007953779489648532, "loss": 0.0784, "theoretical_loss": 3.807101213464625, "tokens_seen": 701497344 }, { "epoch": 0.21, "learning_rate": 0.0007952977050232708, "loss": 0.08, "theoretical_loss": 3.8069579268947242, "tokens_seen": 701759488 }, { "epoch": 0.21, "learning_rate": 0.0007952174610816883, "loss": 0.0812, "theoretical_loss": 3.806814708820519, "tokens_seen": 702021632 }, { "epoch": 0.21, "learning_rate": 0.0007951372171401059, "loss": 0.0758, "theoretical_loss": 3.806671559183706, "tokens_seen": 702283776 }, { "epoch": 0.21, "learning_rate": 0.0007950569731985235, "loss": 0.0778, "theoretical_loss": 3.806528477926056, "tokens_seen": 702545920 }, { "epoch": 0.21, "learning_rate": 0.000794976729256941, "loss": 0.081, "theoretical_loss": 3.806385464989409, "tokens_seen": 702808064 }, { "epoch": 0.21, "learning_rate": 0.0007948964853153588, "loss": 0.0806, "theoretical_loss": 3.806242520315676, "tokens_seen": 703070208 }, { "epoch": 0.21, "learning_rate": 0.0007948162413737763, "loss": 0.0789, "theoretical_loss": 3.806099643846841, "tokens_seen": 703332352 }, { "epoch": 0.21, "learning_rate": 0.0007947359974321939, "loss": 0.0789, "theoretical_loss": 3.8059568355249564, "tokens_seen": 703594496 }, { "epoch": 0.21, "learning_rate": 0.0007946557534906115, "loss": 0.0786, "theoretical_loss": 3.8058140952921478, "tokens_seen": 703856640 }, { "epoch": 0.21, "learning_rate": 0.000794575509549029, "loss": 0.0794, "theoretical_loss": 3.805671423090609, "tokens_seen": 704118784 }, { "epoch": 0.21, "learning_rate": 0.0007944952656074467, "loss": 0.0779, "theoretical_loss": 3.805528818862607, "tokens_seen": 704380928 }, { "epoch": 0.21, "learning_rate": 0.0007944150216658642, "loss": 0.0777, "theoretical_loss": 3.8053862825504776, "tokens_seen": 704643072 }, { "epoch": 0.21, "learning_rate": 0.0007943347777242818, "loss": 0.0805, "theoretical_loss": 3.8052438140966265, "tokens_seen": 704905216 }, { "epoch": 0.21, "learning_rate": 0.0007942545337826995, "loss": 0.08, "theoretical_loss": 3.8051014134435315, "tokens_seen": 705167360 }, { "epoch": 0.21, "learning_rate": 0.0007941742898411171, "loss": 0.0751, "theoretical_loss": 3.804959080533739, "tokens_seen": 705429504 }, { "epoch": 0.21, "learning_rate": 0.0007940940458995346, "loss": 0.0788, "theoretical_loss": 3.8048168153098656, "tokens_seen": 705691648 }, { "epoch": 0.21, "learning_rate": 0.0007940138019579522, "loss": 0.0784, "theoretical_loss": 3.8046746177145985, "tokens_seen": 705953792 }, { "epoch": 0.21, "learning_rate": 0.0007939335580163698, "loss": 0.0796, "theoretical_loss": 3.804532487690694, "tokens_seen": 706215936 }, { "epoch": 0.21, "learning_rate": 0.0007938533140747873, "loss": 0.078, "theoretical_loss": 3.8043904251809786, "tokens_seen": 706478080 }, { "epoch": 0.21, "learning_rate": 0.000793773070133205, "loss": 0.0793, "theoretical_loss": 3.8042484301283475, "tokens_seen": 706740224 }, { "epoch": 0.21, "learning_rate": 0.0007936928261916225, "loss": 0.0805, "theoretical_loss": 3.8041065024757668, "tokens_seen": 707002368 }, { "epoch": 0.21, "learning_rate": 0.0007936125822500401, "loss": 0.0788, "theoretical_loss": 3.8039646421662705, "tokens_seen": 707264512 }, { "epoch": 0.21, "learning_rate": 0.0007935323383084578, "loss": 0.078, "theoretical_loss": 3.8038228491429624, "tokens_seen": 707526656 }, { "epoch": 0.21, "objective/train/advantage_avg": -0.0002181452582590282, "objective/train/docs_used": 262675, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6911776065826416, "objective/train/original_loss": 1.6911776065826416, "objective/train/theoretical_loss": 3.8036811233490164, "objective/train/tokens_used": 728248800, "objective/train/value_avg": -0.0099334716796875, "objective/train/value_loss": 0.0003097241569776088, "objective/train/value_max": -0.00015723705291748047, "objective/train/value_min": -0.83837890625, "objective/train/value_reward_corr": 0.6940126679848521, "objective/train/value_std": 0.0159912109375, "objective/train/weight_avg": 0.9999237656593323, "objective/train/weighted_lm_loss": 1.6909995079040527, "objective/train/weights_max": 1.2376219034194946, "objective/train/weights_min": 0.3753642737865448, "theoretical_loss": 3.8036811233490164, "tokens_seen": 707788800 }, { "epoch": 0.21, "learning_rate": 0.0007934520943668753, "loss": 0.08, "theoretical_loss": 3.8036811233490164, "tokens_seen": 707788800 }, { "epoch": 0.21, "learning_rate": 0.0007933718504252929, "loss": 0.0775, "theoretical_loss": 3.803539464727673, "tokens_seen": 708050944 }, { "epoch": 0.21, "learning_rate": 0.0007932916064837105, "loss": 0.0812, "theoretical_loss": 3.803397873222244, "tokens_seen": 708313088 }, { "epoch": 0.21, "learning_rate": 0.0007932113625421281, "loss": 0.08, "theoretical_loss": 3.8032563487761095, "tokens_seen": 708575232 }, { "epoch": 0.21, "learning_rate": 0.0007931311186005457, "loss": 0.0772, "theoretical_loss": 3.8031148913327177, "tokens_seen": 708837376 }, { "epoch": 0.21, "learning_rate": 0.0007930508746589633, "loss": 0.0804, "theoretical_loss": 3.8029735008355843, "tokens_seen": 709099520 }, { "epoch": 0.21, "learning_rate": 0.0007929706307173808, "loss": 0.0799, "theoretical_loss": 3.8028321772282965, "tokens_seen": 709361664 }, { "epoch": 0.22, "learning_rate": 0.0007928903867757984, "loss": 0.0789, "theoretical_loss": 3.8026909204545065, "tokens_seen": 709623808 }, { "epoch": 0.22, "learning_rate": 0.000792810142834216, "loss": 0.0811, "theoretical_loss": 3.8025497304579376, "tokens_seen": 709885952 }, { "epoch": 0.22, "learning_rate": 0.0007927298988926336, "loss": 0.0782, "theoretical_loss": 3.80240860718238, "tokens_seen": 710148096 }, { "epoch": 0.22, "learning_rate": 0.0007926496549510513, "loss": 0.0827, "theoretical_loss": 3.802267550571691, "tokens_seen": 710410240 }, { "epoch": 0.22, "learning_rate": 0.0007925694110094688, "loss": 0.0818, "theoretical_loss": 3.802126560569798, "tokens_seen": 710672384 }, { "epoch": 0.22, "learning_rate": 0.0007924891670678864, "loss": 0.0812, "theoretical_loss": 3.801985637120694, "tokens_seen": 710934528 }, { "epoch": 0.22, "learning_rate": 0.000792408923126304, "loss": 0.0785, "theoretical_loss": 3.801844780168441, "tokens_seen": 711196672 }, { "epoch": 0.22, "learning_rate": 0.0007923286791847216, "loss": 0.0815, "theoretical_loss": 3.8017039896571685, "tokens_seen": 711458816 }, { "epoch": 0.22, "learning_rate": 0.0007922484352431391, "loss": 0.0784, "theoretical_loss": 3.8015632655310734, "tokens_seen": 711720960 }, { "epoch": 0.22, "learning_rate": 0.0007921681913015567, "loss": 0.08, "theoretical_loss": 3.8014226077344198, "tokens_seen": 711983104 }, { "epoch": 0.22, "learning_rate": 0.0007920879473599743, "loss": 0.0775, "theoretical_loss": 3.8012820162115393, "tokens_seen": 712245248 }, { "epoch": 0.22, "learning_rate": 0.0007920077034183918, "loss": 0.077, "theoretical_loss": 3.801141490906831, "tokens_seen": 712507392 }, { "epoch": 0.22, "learning_rate": 0.0007919274594768096, "loss": 0.0777, "theoretical_loss": 3.80100103176476, "tokens_seen": 712769536 }, { "epoch": 0.22, "learning_rate": 0.0007918472155352271, "loss": 0.0814, "theoretical_loss": 3.8008606387298594, "tokens_seen": 713031680 }, { "epoch": 0.22, "learning_rate": 0.0007917669715936448, "loss": 0.0764, "theoretical_loss": 3.80072031174673, "tokens_seen": 713293824 }, { "epoch": 0.22, "learning_rate": 0.0007916867276520623, "loss": 0.0802, "theoretical_loss": 3.800580050760036, "tokens_seen": 713555968 }, { "epoch": 0.22, "learning_rate": 0.0007916064837104798, "loss": 0.0763, "theoretical_loss": 3.800439855714512, "tokens_seen": 713818112 }, { "epoch": 0.22, "learning_rate": 0.0007915262397688975, "loss": 0.0798, "theoretical_loss": 3.8002997265549574, "tokens_seen": 714080256 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0012540040770545602, "objective/train/docs_used": 265036, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5773979425430298, "objective/train/original_loss": 1.5773980617523193, "objective/train/theoretical_loss": 3.8001596632262387, "objective/train/tokens_used": 734802400, "objective/train/value_avg": -0.01009368896484375, "objective/train/value_loss": 0.00023382111976388842, "objective/train/value_max": -0.00015115737915039062, "objective/train/value_min": -0.39892578125, "objective/train/value_reward_corr": 0.6356383073725016, "objective/train/value_std": 0.01374053955078125, "objective/train/weight_avg": 1.001361608505249, "objective/train/weighted_lm_loss": 1.57895028591156, "objective/train/weights_max": 1.2423335313796997, "objective/train/weights_min": 0.3686169385910034, "theoretical_loss": 3.8001596632262387, "tokens_seen": 714342400 }, { "epoch": 0.22, "learning_rate": 0.000791445995827315, "loss": 0.0799, "theoretical_loss": 3.8001596632262387, "tokens_seen": 714342400 }, { "epoch": 0.22, "learning_rate": 0.0007913657518857326, "loss": 0.0776, "theoretical_loss": 3.8000196656732874, "tokens_seen": 714604544 }, { "epoch": 0.22, "learning_rate": 0.0007912855079441503, "loss": 0.0778, "theoretical_loss": 3.7998797338411032, "tokens_seen": 714866688 }, { "epoch": 0.22, "learning_rate": 0.0007912052640025679, "loss": 0.0794, "theoretical_loss": 3.7997398676747496, "tokens_seen": 715128832 }, { "epoch": 0.22, "learning_rate": 0.0007911250200609854, "loss": 0.0795, "theoretical_loss": 3.7996000671193593, "tokens_seen": 715390976 }, { "epoch": 0.22, "learning_rate": 0.000791044776119403, "loss": 0.0774, "theoretical_loss": 3.7994603321201277, "tokens_seen": 715653120 }, { "epoch": 0.22, "learning_rate": 0.0007909645321778206, "loss": 0.0785, "theoretical_loss": 3.7993206626223177, "tokens_seen": 715915264 }, { "epoch": 0.22, "learning_rate": 0.0007908842882362381, "loss": 0.0787, "theoretical_loss": 3.799181058571258, "tokens_seen": 716177408 }, { "epoch": 0.22, "learning_rate": 0.0007908040442946558, "loss": 0.0754, "theoretical_loss": 3.7990415199123424, "tokens_seen": 716439552 }, { "epoch": 0.22, "learning_rate": 0.0007907238003530733, "loss": 0.0804, "theoretical_loss": 3.79890204659103, "tokens_seen": 716701696 }, { "epoch": 0.22, "learning_rate": 0.000790643556411491, "loss": 0.0781, "theoretical_loss": 3.7987626385528466, "tokens_seen": 716963840 }, { "epoch": 0.22, "learning_rate": 0.0007905633124699085, "loss": 0.0815, "theoretical_loss": 3.798623295743382, "tokens_seen": 717225984 }, { "epoch": 0.22, "learning_rate": 0.000790483068528326, "loss": 0.0817, "theoretical_loss": 3.798484018108291, "tokens_seen": 717488128 }, { "epoch": 0.22, "learning_rate": 0.0007904028245867438, "loss": 0.078, "theoretical_loss": 3.7983448055932953, "tokens_seen": 717750272 }, { "epoch": 0.22, "learning_rate": 0.0007903225806451613, "loss": 0.0819, "theoretical_loss": 3.79820565814418, "tokens_seen": 718012416 }, { "epoch": 0.22, "learning_rate": 0.0007902423367035789, "loss": 0.0782, "theoretical_loss": 3.798066575706795, "tokens_seen": 718274560 }, { "epoch": 0.22, "learning_rate": 0.0007901620927619965, "loss": 0.0779, "theoretical_loss": 3.797927558227056, "tokens_seen": 718536704 }, { "epoch": 0.22, "learning_rate": 0.0007900818488204141, "loss": 0.077, "theoretical_loss": 3.7977886056509433, "tokens_seen": 718798848 }, { "epoch": 0.22, "learning_rate": 0.0007900016048788316, "loss": 0.078, "theoretical_loss": 3.797649717924502, "tokens_seen": 719060992 }, { "epoch": 0.22, "learning_rate": 0.0007899213609372492, "loss": 0.0789, "theoretical_loss": 3.797510894993839, "tokens_seen": 719323136 }, { "epoch": 0.22, "learning_rate": 0.0007898411169956668, "loss": 0.078, "theoretical_loss": 3.79737213680513, "tokens_seen": 719585280 }, { "epoch": 0.22, "learning_rate": 0.0007897608730540844, "loss": 0.0787, "theoretical_loss": 3.797233443304612, "tokens_seen": 719847424 }, { "epoch": 0.22, "learning_rate": 0.0007896806291125021, "loss": 0.0801, "theoretical_loss": 3.7970948144385868, "tokens_seen": 720109568 }, { "epoch": 0.22, "learning_rate": 0.0007896003851709196, "loss": 0.0761, "theoretical_loss": 3.796956250153421, "tokens_seen": 720371712 }, { "epoch": 0.22, "learning_rate": 0.0007895201412293372, "loss": 0.0796, "theoretical_loss": 3.796817750395544, "tokens_seen": 720633856 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0021576914004981518, "objective/train/docs_used": 267624, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.453436255455017, "objective/train/original_loss": 1.4534361362457275, "objective/train/theoretical_loss": 3.7966793151114504, "objective/train/tokens_used": 741356000, "objective/train/value_avg": -0.00920867919921875, "objective/train/value_loss": 0.0003623893717303872, "objective/train/value_max": -0.0002148151397705078, "objective/train/value_min": -0.8974609375, "objective/train/value_reward_corr": 0.59584870633317, "objective/train/value_std": 0.0142364501953125, "objective/train/weight_avg": 1.0023139715194702, "objective/train/weighted_lm_loss": 1.456945538520813, "objective/train/weights_max": 1.6503322124481201, "objective/train/weights_min": 0.2744547724723816, "theoretical_loss": 3.7966793151114504, "tokens_seen": 720896000 }, { "epoch": 0.22, "learning_rate": 0.0007894398972877548, "loss": 0.0765, "theoretical_loss": 3.7966793151114504, "tokens_seen": 720896000 }, { "epoch": 0.22, "learning_rate": 0.0007893596533461724, "loss": 0.0766, "theoretical_loss": 3.796540944247698, "tokens_seen": 721158144 }, { "epoch": 0.22, "learning_rate": 0.00078927940940459, "loss": 0.0778, "theoretical_loss": 3.796402637750908, "tokens_seen": 721420288 }, { "epoch": 0.22, "learning_rate": 0.0007891991654630075, "loss": 0.0806, "theoretical_loss": 3.796264395567766, "tokens_seen": 721682432 }, { "epoch": 0.22, "learning_rate": 0.0007891189215214251, "loss": 0.0784, "theoretical_loss": 3.7961262176450195, "tokens_seen": 721944576 }, { "epoch": 0.22, "learning_rate": 0.0007890386775798428, "loss": 0.0797, "theoretical_loss": 3.795988103929482, "tokens_seen": 722206720 }, { "epoch": 0.22, "learning_rate": 0.0007889584336382604, "loss": 0.0758, "theoretical_loss": 3.7958500543680276, "tokens_seen": 722468864 }, { "epoch": 0.22, "learning_rate": 0.0007888781896966779, "loss": 0.0788, "theoretical_loss": 3.795712068907596, "tokens_seen": 722731008 }, { "epoch": 0.22, "learning_rate": 0.0007887979457550956, "loss": 0.0822, "theoretical_loss": 3.795574147495188, "tokens_seen": 722993152 }, { "epoch": 0.22, "learning_rate": 0.0007887177018135131, "loss": 0.0806, "theoretical_loss": 3.795436290077868, "tokens_seen": 723255296 }, { "epoch": 0.22, "learning_rate": 0.0007886374578719306, "loss": 0.0809, "theoretical_loss": 3.795298496602765, "tokens_seen": 723517440 }, { "epoch": 0.22, "learning_rate": 0.0007885572139303483, "loss": 0.0776, "theoretical_loss": 3.795160767017068, "tokens_seen": 723779584 }, { "epoch": 0.22, "learning_rate": 0.0007884769699887658, "loss": 0.0785, "theoretical_loss": 3.795023101268031, "tokens_seen": 724041728 }, { "epoch": 0.22, "learning_rate": 0.0007883967260471834, "loss": 0.0826, "theoretical_loss": 3.7948854993029695, "tokens_seen": 724303872 }, { "epoch": 0.22, "learning_rate": 0.000788316482105601, "loss": 0.0766, "theoretical_loss": 3.7947479610692616, "tokens_seen": 724566016 }, { "epoch": 0.22, "learning_rate": 0.0007882362381640187, "loss": 0.0783, "theoretical_loss": 3.794610486514348, "tokens_seen": 724828160 }, { "epoch": 0.22, "learning_rate": 0.0007881559942224363, "loss": 0.0788, "theoretical_loss": 3.7944730755857323, "tokens_seen": 725090304 }, { "epoch": 0.22, "learning_rate": 0.0007880757502808538, "loss": 0.0772, "theoretical_loss": 3.794335728230979, "tokens_seen": 725352448 }, { "epoch": 0.22, "learning_rate": 0.0007879955063392714, "loss": 0.0779, "theoretical_loss": 3.7941984443977157, "tokens_seen": 725614592 }, { "epoch": 0.22, "learning_rate": 0.000787915262397689, "loss": 0.0761, "theoretical_loss": 3.7940612240336327, "tokens_seen": 725876736 }, { "epoch": 0.22, "learning_rate": 0.0007878350184561066, "loss": 0.0785, "theoretical_loss": 3.793924067086481, "tokens_seen": 726138880 }, { "epoch": 0.22, "learning_rate": 0.0007877547745145241, "loss": 0.078, "theoretical_loss": 3.793786973504073, "tokens_seen": 726401024 }, { "epoch": 0.22, "learning_rate": 0.0007876745305729418, "loss": 0.077, "theoretical_loss": 3.7936499432342847, "tokens_seen": 726663168 }, { "epoch": 0.22, "learning_rate": 0.0007875942866313593, "loss": 0.0779, "theoretical_loss": 3.7935129762250526, "tokens_seen": 726925312 }, { "epoch": 0.22, "learning_rate": 0.0007875140426897769, "loss": 0.0811, "theoretical_loss": 3.7933760724243752, "tokens_seen": 727187456 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.001988545060157776, "objective/train/docs_used": 269904, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5989689826965332, "objective/train/original_loss": 1.5989689826965332, "objective/train/theoretical_loss": 3.7932392317803116, "objective/train/tokens_used": 747909600, "objective/train/value_avg": -0.0074920654296875, "objective/train/value_loss": 0.00017211709928233176, "objective/train/value_max": -0.0002397298812866211, "objective/train/value_min": -0.310302734375, "objective/train/value_reward_corr": 0.5813086882183613, "objective/train/value_std": 0.0099639892578125, "objective/train/weight_avg": 1.0020697116851807, "objective/train/weighted_lm_loss": 1.6021634340286255, "objective/train/weights_max": 1.3142492771148682, "objective/train/weights_min": 0.36833229660987854, "theoretical_loss": 3.7932392317803116, "tokens_seen": 727449600 }, { "epoch": 0.22, "learning_rate": 0.0007874337987481946, "loss": 0.0795, "theoretical_loss": 3.7932392317803116, "tokens_seen": 727449600 }, { "epoch": 0.22, "learning_rate": 0.0007873535548066121, "loss": 0.077, "theoretical_loss": 3.7931024542409837, "tokens_seen": 727711744 }, { "epoch": 0.22, "learning_rate": 0.0007872733108650297, "loss": 0.0782, "theoretical_loss": 3.7929657397545733, "tokens_seen": 727973888 }, { "epoch": 0.22, "learning_rate": 0.0007871930669234473, "loss": 0.0766, "theoretical_loss": 3.792829088269324, "tokens_seen": 728236032 }, { "epoch": 0.22, "learning_rate": 0.0007871128229818649, "loss": 0.0768, "theoretical_loss": 3.792692499733541, "tokens_seen": 728498176 }, { "epoch": 0.22, "learning_rate": 0.0007870325790402824, "loss": 0.0784, "theoretical_loss": 3.7925559740955896, "tokens_seen": 728760320 }, { "epoch": 0.22, "learning_rate": 0.0007869523350987, "loss": 0.0775, "theoretical_loss": 3.7924195113038968, "tokens_seen": 729022464 }, { "epoch": 0.22, "learning_rate": 0.0007868720911571176, "loss": 0.0786, "theoretical_loss": 3.7922831113069493, "tokens_seen": 729284608 }, { "epoch": 0.22, "learning_rate": 0.0007867918472155353, "loss": 0.0761, "theoretical_loss": 3.792146774053296, "tokens_seen": 729546752 }, { "epoch": 0.22, "learning_rate": 0.0007867116032739529, "loss": 0.0767, "theoretical_loss": 3.792010499491545, "tokens_seen": 729808896 }, { "epoch": 0.22, "learning_rate": 0.0007866313593323704, "loss": 0.0752, "theoretical_loss": 3.7918742875703657, "tokens_seen": 730071040 }, { "epoch": 0.22, "learning_rate": 0.0007865511153907881, "loss": 0.0762, "theoretical_loss": 3.7917381382384883, "tokens_seen": 730333184 }, { "epoch": 0.22, "learning_rate": 0.0007864708714492056, "loss": 0.0767, "theoretical_loss": 3.791602051444703, "tokens_seen": 730595328 }, { "epoch": 0.22, "learning_rate": 0.0007863906275076232, "loss": 0.075, "theoretical_loss": 3.791466027137859, "tokens_seen": 730857472 }, { "epoch": 0.22, "learning_rate": 0.0007863103835660408, "loss": 0.0781, "theoretical_loss": 3.7913300652668678, "tokens_seen": 731119616 }, { "epoch": 0.22, "learning_rate": 0.0007862301396244583, "loss": 0.0776, "theoretical_loss": 3.7911941657807002, "tokens_seen": 731381760 }, { "epoch": 0.22, "learning_rate": 0.0007861498956828759, "loss": 0.0752, "theoretical_loss": 3.7910583286283854, "tokens_seen": 731643904 }, { "epoch": 0.22, "learning_rate": 0.0007860696517412936, "loss": 0.0783, "theoretical_loss": 3.7909225537590157, "tokens_seen": 731906048 }, { "epoch": 0.22, "learning_rate": 0.0007859894077997112, "loss": 0.0766, "theoretical_loss": 3.790786841121739, "tokens_seen": 732168192 }, { "epoch": 0.22, "learning_rate": 0.0007859091638581287, "loss": 0.0765, "theoretical_loss": 3.7906511906657676, "tokens_seen": 732430336 }, { "epoch": 0.22, "learning_rate": 0.0007858289199165464, "loss": 0.0764, "theoretical_loss": 3.7905156023403697, "tokens_seen": 732692480 }, { "epoch": 0.22, "learning_rate": 0.0007857486759749639, "loss": 0.0781, "theoretical_loss": 3.7903800760948743, "tokens_seen": 732954624 }, { "epoch": 0.22, "learning_rate": 0.0007856684320333814, "loss": 0.0776, "theoretical_loss": 3.790244611878671, "tokens_seen": 733216768 }, { "epoch": 0.22, "learning_rate": 0.0007855881880917991, "loss": 0.0778, "theoretical_loss": 3.790109209641206, "tokens_seen": 733478912 }, { "epoch": 0.22, "learning_rate": 0.0007855079441502166, "loss": 0.0768, "theoretical_loss": 3.7899738693319875, "tokens_seen": 733741056 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.001371144549921155, "objective/train/docs_used": 272255, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6625961065292358, "objective/train/original_loss": 1.6625961065292358, "objective/train/theoretical_loss": 3.7898385909005814, "objective/train/tokens_used": 754463200, "objective/train/value_avg": -0.007289886474609375, "objective/train/value_loss": 0.00022098013141658157, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.287353515625, "objective/train/value_reward_corr": 0.5914789303006973, "objective/train/value_std": 0.0107879638671875, "objective/train/weight_avg": 1.0014703273773193, "objective/train/weighted_lm_loss": 1.6649521589279175, "objective/train/weights_max": 1.150571584701538, "objective/train/weights_min": 0.3686802387237549, "theoretical_loss": 3.7898385909005814, "tokens_seen": 734003200 }, { "epoch": 0.22, "learning_rate": 0.0007854277002086343, "loss": 0.08, "theoretical_loss": 3.7898385909005814, "tokens_seen": 734003200 }, { "epoch": 0.22, "learning_rate": 0.0007853474562670519, "loss": 0.0773, "theoretical_loss": 3.7897033742966135, "tokens_seen": 734265344 }, { "epoch": 0.22, "learning_rate": 0.0007852672123254695, "loss": 0.0774, "theoretical_loss": 3.789568219469767, "tokens_seen": 734527488 }, { "epoch": 0.22, "learning_rate": 0.0007851869683838871, "loss": 0.0798, "theoretical_loss": 3.789433126369786, "tokens_seen": 734789632 }, { "epoch": 0.22, "learning_rate": 0.0007851067244423046, "loss": 0.0765, "theoretical_loss": 3.7892980949464716, "tokens_seen": 735051776 }, { "epoch": 0.22, "learning_rate": 0.0007850264805007222, "loss": 0.0779, "theoretical_loss": 3.7891631251496856, "tokens_seen": 735313920 }, { "epoch": 0.22, "learning_rate": 0.0007849462365591398, "loss": 0.0772, "theoretical_loss": 3.7890282169293465, "tokens_seen": 735576064 }, { "epoch": 0.22, "learning_rate": 0.0007848659926175574, "loss": 0.0762, "theoretical_loss": 3.7888933702354324, "tokens_seen": 735838208 }, { "epoch": 0.22, "learning_rate": 0.0007847857486759749, "loss": 0.0792, "theoretical_loss": 3.7887585850179786, "tokens_seen": 736100352 }, { "epoch": 0.22, "learning_rate": 0.0007847055047343926, "loss": 0.0784, "theoretical_loss": 3.788623861227081, "tokens_seen": 736362496 }, { "epoch": 0.22, "learning_rate": 0.0007846252607928101, "loss": 0.0751, "theoretical_loss": 3.7884891988128926, "tokens_seen": 736624640 }, { "epoch": 0.22, "learning_rate": 0.0007845450168512277, "loss": 0.0778, "theoretical_loss": 3.7883545977256228, "tokens_seen": 736886784 }, { "epoch": 0.22, "learning_rate": 0.0007844647729096454, "loss": 0.0781, "theoretical_loss": 3.7882200579155416, "tokens_seen": 737148928 }, { "epoch": 0.22, "learning_rate": 0.0007843845289680629, "loss": 0.0788, "theoretical_loss": 3.788085579332977, "tokens_seen": 737411072 }, { "epoch": 0.22, "learning_rate": 0.0007843042850264806, "loss": 0.0772, "theoretical_loss": 3.787951161928312, "tokens_seen": 737673216 }, { "epoch": 0.22, "learning_rate": 0.0007842240410848981, "loss": 0.0782, "theoretical_loss": 3.7878168056519916, "tokens_seen": 737935360 }, { "epoch": 0.22, "learning_rate": 0.0007841437971433157, "loss": 0.0766, "theoretical_loss": 3.787682510454515, "tokens_seen": 738197504 }, { "epoch": 0.22, "learning_rate": 0.0007840635532017333, "loss": 0.0777, "theoretical_loss": 3.7875482762864405, "tokens_seen": 738459648 }, { "epoch": 0.22, "learning_rate": 0.0007839833092601508, "loss": 0.0757, "theoretical_loss": 3.787414103098384, "tokens_seen": 738721792 }, { "epoch": 0.22, "learning_rate": 0.0007839030653185684, "loss": 0.0765, "theoretical_loss": 3.7872799908410193, "tokens_seen": 738983936 }, { "epoch": 0.22, "learning_rate": 0.0007838228213769861, "loss": 0.0761, "theoretical_loss": 3.787145939465076, "tokens_seen": 739246080 }, { "epoch": 0.22, "learning_rate": 0.0007837425774354037, "loss": 0.0771, "theoretical_loss": 3.7870119489213425, "tokens_seen": 739508224 }, { "epoch": 0.22, "learning_rate": 0.0007836623334938212, "loss": 0.0794, "theoretical_loss": 3.786878019160664, "tokens_seen": 739770368 }, { "epoch": 0.22, "learning_rate": 0.0007835820895522389, "loss": 0.0761, "theoretical_loss": 3.7867441501339427, "tokens_seen": 740032512 }, { "epoch": 0.22, "learning_rate": 0.0007835018456106564, "loss": 0.0781, "theoretical_loss": 3.7866103417921373, "tokens_seen": 740294656 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0015989760868251324, "objective/train/docs_used": 274540, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6382532119750977, "objective/train/original_loss": 1.6382529735565186, "objective/train/theoretical_loss": 3.786476594086265, "objective/train/tokens_used": 761016800, "objective/train/value_avg": -0.0094757080078125, "objective/train/value_loss": 0.0003717961080837995, "objective/train/value_max": -0.00016605854034423828, "objective/train/value_min": -0.59716796875, "objective/train/value_reward_corr": 0.6324069891531287, "objective/train/value_std": 0.0156707763671875, "objective/train/weight_avg": 1.0017673969268799, "objective/train/weighted_lm_loss": 1.6408437490463257, "objective/train/weights_max": 1.6781408786773682, "objective/train/weights_min": 0.37242892384529114, "theoretical_loss": 3.786476594086265, "tokens_seen": 740556800 }, { "epoch": 0.22, "learning_rate": 0.0007834216016690739, "loss": 0.0818, "theoretical_loss": 3.786476594086265, "tokens_seen": 740556800 }, { "epoch": 0.22, "learning_rate": 0.0007833413577274916, "loss": 0.076, "theoretical_loss": 3.7863429069673984, "tokens_seen": 740818944 }, { "epoch": 0.22, "learning_rate": 0.0007832611137859091, "loss": 0.0773, "theoretical_loss": 3.7862092803866663, "tokens_seen": 741081088 }, { "epoch": 0.22, "learning_rate": 0.0007831808698443267, "loss": 0.0743, "theoretical_loss": 3.786075714295257, "tokens_seen": 741343232 }, { "epoch": 0.22, "learning_rate": 0.0007831006259027444, "loss": 0.0797, "theoretical_loss": 3.7859422086444123, "tokens_seen": 741605376 }, { "epoch": 0.22, "learning_rate": 0.000783020381961162, "loss": 0.0777, "theoretical_loss": 3.7858087633854325, "tokens_seen": 741867520 }, { "epoch": 0.22, "learning_rate": 0.0007829401380195796, "loss": 0.077, "theoretical_loss": 3.785675378469673, "tokens_seen": 742129664 }, { "epoch": 0.22, "learning_rate": 0.0007828598940779972, "loss": 0.0771, "theoretical_loss": 3.7855420538485474, "tokens_seen": 742391808 }, { "epoch": 0.23, "learning_rate": 0.0007827796501364147, "loss": 0.0786, "theoretical_loss": 3.7854087894735233, "tokens_seen": 742653952 }, { "epoch": 0.23, "learning_rate": 0.0007826994061948323, "loss": 0.0749, "theoretical_loss": 3.7852755852961257, "tokens_seen": 742916096 }, { "epoch": 0.23, "learning_rate": 0.0007826191622532499, "loss": 0.0789, "theoretical_loss": 3.785142441267936, "tokens_seen": 743178240 }, { "epoch": 0.23, "learning_rate": 0.0007825389183116674, "loss": 0.0776, "theoretical_loss": 3.7850093573405905, "tokens_seen": 743440384 }, { "epoch": 0.23, "learning_rate": 0.0007824586743700851, "loss": 0.076, "theoretical_loss": 3.7848763334657827, "tokens_seen": 743702528 }, { "epoch": 0.23, "learning_rate": 0.0007823784304285027, "loss": 0.0805, "theoretical_loss": 3.7847433695952617, "tokens_seen": 743964672 }, { "epoch": 0.23, "learning_rate": 0.0007822981864869203, "loss": 0.0783, "theoretical_loss": 3.7846104656808306, "tokens_seen": 744226816 }, { "epoch": 0.23, "learning_rate": 0.0007822179425453379, "loss": 0.0757, "theoretical_loss": 3.7844776216743505, "tokens_seen": 744488960 }, { "epoch": 0.23, "learning_rate": 0.0007821376986037554, "loss": 0.077, "theoretical_loss": 3.784344837527737, "tokens_seen": 744751104 }, { "epoch": 0.23, "learning_rate": 0.000782057454662173, "loss": 0.0759, "theoretical_loss": 3.784212113192961, "tokens_seen": 745013248 }, { "epoch": 0.23, "learning_rate": 0.0007819772107205906, "loss": 0.0773, "theoretical_loss": 3.7840794486220495, "tokens_seen": 745275392 }, { "epoch": 0.23, "learning_rate": 0.0007818969667790082, "loss": 0.0777, "theoretical_loss": 3.783946843767084, "tokens_seen": 745537536 }, { "epoch": 0.23, "learning_rate": 0.0007818167228374257, "loss": 0.0785, "theoretical_loss": 3.783814298580203, "tokens_seen": 745799680 }, { "epoch": 0.23, "learning_rate": 0.0007817364788958434, "loss": 0.0772, "theoretical_loss": 3.7836818130135974, "tokens_seen": 746061824 }, { "epoch": 0.23, "learning_rate": 0.000781656234954261, "loss": 0.0756, "theoretical_loss": 3.783549387019515, "tokens_seen": 746323968 }, { "epoch": 0.23, "learning_rate": 0.0007815759910126786, "loss": 0.0779, "theoretical_loss": 3.7834170205502584, "tokens_seen": 746586112 }, { "epoch": 0.23, "learning_rate": 0.0007814957470710962, "loss": 0.0764, "theoretical_loss": 3.783284713558186, "tokens_seen": 746848256 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.001808889559470117, "objective/train/docs_used": 277055, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6275434494018555, "objective/train/original_loss": 1.627543330192566, "objective/train/theoretical_loss": 3.783152465995708, "objective/train/tokens_used": 767570400, "objective/train/value_avg": -0.01079559326171875, "objective/train/value_loss": 0.0003174249141011387, "objective/train/value_max": -0.00019109249114990234, "objective/train/value_min": -0.4189453125, "objective/train/value_reward_corr": 0.6689077552958134, "objective/train/value_std": 0.01580810546875, "objective/train/weight_avg": 1.001956582069397, "objective/train/weighted_lm_loss": 1.6307531595230103, "objective/train/weights_max": 1.1777127981185913, "objective/train/weights_min": 0.3695334792137146, "theoretical_loss": 3.783152465995708, "tokens_seen": 747110400 }, { "epoch": 0.23, "learning_rate": 0.0007814155031295137, "loss": 0.079, "theoretical_loss": 3.783152465995708, "tokens_seen": 747110400 }, { "epoch": 0.23, "learning_rate": 0.0007813352591879314, "loss": 0.077, "theoretical_loss": 3.7830202778152935, "tokens_seen": 747372544 }, { "epoch": 0.23, "learning_rate": 0.0007812550152463489, "loss": 0.0748, "theoretical_loss": 3.7828881489694632, "tokens_seen": 747634688 }, { "epoch": 0.23, "learning_rate": 0.0007811747713047665, "loss": 0.0756, "theoretical_loss": 3.7827560794107926, "tokens_seen": 747896832 }, { "epoch": 0.23, "learning_rate": 0.0007810945273631841, "loss": 0.0765, "theoretical_loss": 3.7826240690919137, "tokens_seen": 748158976 }, { "epoch": 0.23, "learning_rate": 0.0007810142834216016, "loss": 0.0792, "theoretical_loss": 3.7824921179655115, "tokens_seen": 748421120 }, { "epoch": 0.23, "learning_rate": 0.0007809340394800192, "loss": 0.0773, "theoretical_loss": 3.782360225984325, "tokens_seen": 748683264 }, { "epoch": 0.23, "learning_rate": 0.0007808537955384369, "loss": 0.0769, "theoretical_loss": 3.782228393101149, "tokens_seen": 748945408 }, { "epoch": 0.23, "learning_rate": 0.0007807735515968545, "loss": 0.0775, "theoretical_loss": 3.78209661926883, "tokens_seen": 749207552 }, { "epoch": 0.23, "learning_rate": 0.000780693307655272, "loss": 0.0771, "theoretical_loss": 3.781964904440271, "tokens_seen": 749469696 }, { "epoch": 0.23, "learning_rate": 0.0007806130637136897, "loss": 0.0795, "theoretical_loss": 3.7818332485684283, "tokens_seen": 749731840 }, { "epoch": 0.23, "learning_rate": 0.0007805328197721072, "loss": 0.0772, "theoretical_loss": 3.781701651606311, "tokens_seen": 749993984 }, { "epoch": 0.23, "learning_rate": 0.0007804525758305248, "loss": 0.0793, "theoretical_loss": 3.7815701135069846, "tokens_seen": 750256128 }, { "epoch": 0.23, "learning_rate": 0.0007803723318889424, "loss": 0.0765, "theoretical_loss": 3.7814386342235653, "tokens_seen": 750518272 }, { "epoch": 0.23, "learning_rate": 0.0007802920879473599, "loss": 0.0726, "theoretical_loss": 3.7813072137092254, "tokens_seen": 750780416 }, { "epoch": 0.23, "learning_rate": 0.0007802118440057776, "loss": 0.0749, "theoretical_loss": 3.7811758519171894, "tokens_seen": 751042560 }, { "epoch": 0.23, "learning_rate": 0.0007801316000641952, "loss": 0.0788, "theoretical_loss": 3.781044548800736, "tokens_seen": 751304704 }, { "epoch": 0.23, "learning_rate": 0.0007800513561226128, "loss": 0.0794, "theoretical_loss": 3.7809133043131973, "tokens_seen": 751566848 }, { "epoch": 0.23, "learning_rate": 0.0007799711121810304, "loss": 0.0784, "theoretical_loss": 3.7807821184079584, "tokens_seen": 751828992 }, { "epoch": 0.23, "learning_rate": 0.000779890868239448, "loss": 0.0775, "theoretical_loss": 3.780650991038459, "tokens_seen": 752091136 }, { "epoch": 0.23, "learning_rate": 0.0007798106242978655, "loss": 0.0773, "theoretical_loss": 3.7805199221581893, "tokens_seen": 752353280 }, { "epoch": 0.23, "learning_rate": 0.0007797303803562831, "loss": 0.0796, "theoretical_loss": 3.7803889117206957, "tokens_seen": 752615424 }, { "epoch": 0.23, "learning_rate": 0.0007796501364147007, "loss": 0.0788, "theoretical_loss": 3.7802579596795756, "tokens_seen": 752877568 }, { "epoch": 0.23, "learning_rate": 0.0007795698924731182, "loss": 0.0753, "theoretical_loss": 3.7801270659884807, "tokens_seen": 753139712 }, { "epoch": 0.23, "learning_rate": 0.0007794896485315359, "loss": 0.0779, "theoretical_loss": 3.7799962306011143, "tokens_seen": 753401856 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.000318468373734504, "objective/train/docs_used": 279492, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5279823541641235, "objective/train/original_loss": 1.527982473373413, "objective/train/theoretical_loss": 3.779865453471234, "objective/train/tokens_used": 774124000, "objective/train/value_avg": -0.009857177734375, "objective/train/value_loss": 0.0003417794650886208, "objective/train/value_max": -0.0001767873764038086, "objective/train/value_min": -0.51904296875, "objective/train/value_reward_corr": 0.737366687695921, "objective/train/value_std": 0.01702880859375, "objective/train/weight_avg": 1.0004727840423584, "objective/train/weighted_lm_loss": 1.528694748878479, "objective/train/weights_max": 1.1768189668655396, "objective/train/weights_min": 0.3686802387237549, "theoretical_loss": 3.779865453471234, "tokens_seen": 753664000 }, { "epoch": 0.23, "learning_rate": 0.0007794094045899534, "loss": 0.079, "theoretical_loss": 3.779865453471234, "tokens_seen": 753664000 }, { "epoch": 0.23, "learning_rate": 0.0007793291606483711, "loss": 0.0776, "theoretical_loss": 3.7797347345526484, "tokens_seen": 753926144 }, { "epoch": 0.23, "learning_rate": 0.0007792489167067887, "loss": 0.0775, "theoretical_loss": 3.7796040737992205, "tokens_seen": 754188288 }, { "epoch": 0.23, "learning_rate": 0.0007791686727652062, "loss": 0.0782, "theoretical_loss": 3.7794734711648648, "tokens_seen": 754450432 }, { "epoch": 0.23, "learning_rate": 0.0007790884288236239, "loss": 0.0791, "theoretical_loss": 3.779342926603549, "tokens_seen": 754712576 }, { "epoch": 0.23, "learning_rate": 0.0007790081848820414, "loss": 0.0767, "theoretical_loss": 3.7792124400692924, "tokens_seen": 754974720 }, { "epoch": 0.23, "learning_rate": 0.000778927940940459, "loss": 0.0761, "theoretical_loss": 3.7790820115161674, "tokens_seen": 755236864 }, { "epoch": 0.23, "learning_rate": 0.0007788476969988766, "loss": 0.079, "theoretical_loss": 3.778951640898298, "tokens_seen": 755499008 }, { "epoch": 0.23, "learning_rate": 0.0007787674530572942, "loss": 0.0779, "theoretical_loss": 3.7788213281698617, "tokens_seen": 755761152 }, { "epoch": 0.23, "learning_rate": 0.0007786872091157117, "loss": 0.0758, "theoretical_loss": 3.778691073285086, "tokens_seen": 756023296 }, { "epoch": 0.23, "learning_rate": 0.0007786069651741294, "loss": 0.0796, "theoretical_loss": 3.7785608761982523, "tokens_seen": 756285440 }, { "epoch": 0.23, "learning_rate": 0.000778526721232547, "loss": 0.0763, "theoretical_loss": 3.7784307368636934, "tokens_seen": 756547584 }, { "epoch": 0.23, "learning_rate": 0.0007784464772909645, "loss": 0.0754, "theoretical_loss": 3.7783006552357934, "tokens_seen": 756809728 }, { "epoch": 0.23, "learning_rate": 0.0007783662333493822, "loss": 0.0791, "theoretical_loss": 3.7781706312689893, "tokens_seen": 757071872 }, { "epoch": 0.23, "learning_rate": 0.0007782859894077997, "loss": 0.0778, "theoretical_loss": 3.7780406649177696, "tokens_seen": 757334016 }, { "epoch": 0.23, "learning_rate": 0.0007782057454662173, "loss": 0.0771, "theoretical_loss": 3.777910756136673, "tokens_seen": 757596160 }, { "epoch": 0.23, "learning_rate": 0.0007781255015246349, "loss": 0.0779, "theoretical_loss": 3.777780904880292, "tokens_seen": 757858304 }, { "epoch": 0.23, "learning_rate": 0.0007780452575830524, "loss": 0.0791, "theoretical_loss": 3.777651111103269, "tokens_seen": 758120448 }, { "epoch": 0.23, "learning_rate": 0.0007779650136414702, "loss": 0.0767, "theoretical_loss": 3.777521374760298, "tokens_seen": 758382592 }, { "epoch": 0.23, "learning_rate": 0.0007778847696998877, "loss": 0.0794, "theoretical_loss": 3.7773916958061253, "tokens_seen": 758644736 }, { "epoch": 0.23, "learning_rate": 0.0007778045257583053, "loss": 0.081, "theoretical_loss": 3.777262074195548, "tokens_seen": 758906880 }, { "epoch": 0.23, "learning_rate": 0.0007777242818167229, "loss": 0.079, "theoretical_loss": 3.777132509883413, "tokens_seen": 759169024 }, { "epoch": 0.23, "learning_rate": 0.0007776440378751405, "loss": 0.0786, "theoretical_loss": 3.7770030028246215, "tokens_seen": 759431168 }, { "epoch": 0.23, "learning_rate": 0.000777563793933558, "loss": 0.0776, "theoretical_loss": 3.7768735529741226, "tokens_seen": 759693312 }, { "epoch": 0.23, "learning_rate": 0.0007774835499919756, "loss": 0.0756, "theoretical_loss": 3.776744160286918, "tokens_seen": 759955456 }, { "epoch": 0.23, "objective/train/advantage_avg": -0.00012553480337373912, "objective/train/docs_used": 281753, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.536434531211853, "objective/train/original_loss": 1.5364346504211426, "objective/train/theoretical_loss": 3.77661482471806, "objective/train/tokens_used": 780677600, "objective/train/value_avg": -0.009521484375, "objective/train/value_loss": 0.0005207026842981577, "objective/train/value_max": -0.00010311603546142578, "objective/train/value_min": -0.755859375, "objective/train/value_reward_corr": 0.614135921117643, "objective/train/value_std": 0.01593017578125, "objective/train/weight_avg": 1.0001031160354614, "objective/train/weighted_lm_loss": 1.5369855165481567, "objective/train/weights_max": 1.5545350313186646, "objective/train/weights_min": 0.26852282881736755, "theoretical_loss": 3.77661482471806, "tokens_seen": 760217600 }, { "epoch": 0.23, "learning_rate": 0.0007774033060503932, "loss": 0.0804, "theoretical_loss": 3.77661482471806, "tokens_seen": 760217600 }, { "epoch": 0.23, "learning_rate": 0.0007773230621088107, "loss": 0.0797, "theoretical_loss": 3.776485546222651, "tokens_seen": 760479744 }, { "epoch": 0.23, "learning_rate": 0.0007772428181672284, "loss": 0.0783, "theoretical_loss": 3.776356324755847, "tokens_seen": 760741888 }, { "epoch": 0.23, "learning_rate": 0.000777162574225646, "loss": 0.0766, "theoretical_loss": 3.7762271602728497, "tokens_seen": 761004032 }, { "epoch": 0.23, "learning_rate": 0.0007770823302840636, "loss": 0.0784, "theoretical_loss": 3.7760980527289156, "tokens_seen": 761266176 }, { "epoch": 0.23, "learning_rate": 0.0007770020863424812, "loss": 0.0756, "theoretical_loss": 3.77596900207935, "tokens_seen": 761528320 }, { "epoch": 0.23, "learning_rate": 0.0007769218424008987, "loss": 0.076, "theoretical_loss": 3.775840008279509, "tokens_seen": 761790464 }, { "epoch": 0.23, "learning_rate": 0.0007768415984593163, "loss": 0.0785, "theoretical_loss": 3.7757110712847997, "tokens_seen": 762052608 }, { "epoch": 0.23, "learning_rate": 0.0007767613545177339, "loss": 0.0791, "theoretical_loss": 3.775582191050678, "tokens_seen": 762314752 }, { "epoch": 0.23, "learning_rate": 0.0007766811105761515, "loss": 0.0781, "theoretical_loss": 3.775453367532651, "tokens_seen": 762576896 }, { "epoch": 0.23, "learning_rate": 0.0007766008666345691, "loss": 0.0781, "theoretical_loss": 3.775324600686276, "tokens_seen": 762839040 }, { "epoch": 0.23, "learning_rate": 0.0007765206226929867, "loss": 0.0774, "theoretical_loss": 3.7751958904671614, "tokens_seen": 763101184 }, { "epoch": 0.23, "learning_rate": 0.0007764403787514042, "loss": 0.0785, "theoretical_loss": 3.7750672368309623, "tokens_seen": 763363328 }, { "epoch": 0.23, "learning_rate": 0.000776360134809822, "loss": 0.0812, "theoretical_loss": 3.7749386397333873, "tokens_seen": 763625472 }, { "epoch": 0.23, "learning_rate": 0.0007762798908682395, "loss": 0.0782, "theoretical_loss": 3.774810099130193, "tokens_seen": 763887616 }, { "epoch": 0.23, "learning_rate": 0.000776199646926657, "loss": 0.0785, "theoretical_loss": 3.7746816149771862, "tokens_seen": 764149760 }, { "epoch": 0.23, "learning_rate": 0.0007761194029850747, "loss": 0.0772, "theoretical_loss": 3.7745531872302234, "tokens_seen": 764411904 }, { "epoch": 0.23, "learning_rate": 0.0007760391590434922, "loss": 0.078, "theoretical_loss": 3.774424815845211, "tokens_seen": 764674048 }, { "epoch": 0.23, "learning_rate": 0.0007759589151019098, "loss": 0.0778, "theoretical_loss": 3.774296500778105, "tokens_seen": 764936192 }, { "epoch": 0.23, "learning_rate": 0.0007758786711603274, "loss": 0.0783, "theoretical_loss": 3.77416824198491, "tokens_seen": 765198336 }, { "epoch": 0.23, "learning_rate": 0.000775798427218745, "loss": 0.0762, "theoretical_loss": 3.7740400394216813, "tokens_seen": 765460480 }, { "epoch": 0.23, "learning_rate": 0.0007757181832771625, "loss": 0.081, "theoretical_loss": 3.7739118930445223, "tokens_seen": 765722624 }, { "epoch": 0.23, "learning_rate": 0.0007756379393355802, "loss": 0.0798, "theoretical_loss": 3.7737838028095867, "tokens_seen": 765984768 }, { "epoch": 0.23, "learning_rate": 0.0007755576953939978, "loss": 0.0771, "theoretical_loss": 3.773655768673077, "tokens_seen": 766246912 }, { "epoch": 0.23, "learning_rate": 0.0007754774514524153, "loss": 0.0804, "theoretical_loss": 3.7735277905912445, "tokens_seen": 766509056 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0010230033658444881, "objective/train/docs_used": 283988, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5113848447799683, "objective/train/original_loss": 1.5113848447799683, "objective/train/theoretical_loss": 3.773399868520391, "objective/train/tokens_used": 787231200, "objective/train/value_avg": -0.009429931640625, "objective/train/value_loss": 0.00033447632449679077, "objective/train/value_max": -0.00022518634796142578, "objective/train/value_min": -0.59326171875, "objective/train/value_reward_corr": 0.6839028954132548, "objective/train/value_std": 0.0179901123046875, "objective/train/weight_avg": 1.0011742115020752, "objective/train/weighted_lm_loss": 1.5128288269042969, "objective/train/weights_max": 1.5810500383377075, "objective/train/weights_min": 0.370164155960083, "theoretical_loss": 3.773399868520391, "tokens_seen": 766771200 }, { "epoch": 0.23, "learning_rate": 0.000775397207510833, "loss": 0.0784, "theoretical_loss": 3.773399868520391, "tokens_seen": 766771200 }, { "epoch": 0.23, "learning_rate": 0.0007753169635692505, "loss": 0.0784, "theoretical_loss": 3.7732720024168644, "tokens_seen": 767033344 }, { "epoch": 0.23, "learning_rate": 0.0007752367196276682, "loss": 0.0771, "theoretical_loss": 3.773144192237065, "tokens_seen": 767295488 }, { "epoch": 0.23, "learning_rate": 0.0007751564756860857, "loss": 0.078, "theoretical_loss": 3.7730164379374402, "tokens_seen": 767557632 }, { "epoch": 0.23, "learning_rate": 0.0007750762317445032, "loss": 0.0775, "theoretical_loss": 3.772888739474485, "tokens_seen": 767819776 }, { "epoch": 0.23, "learning_rate": 0.000774995987802921, "loss": 0.0768, "theoretical_loss": 3.772761096804745, "tokens_seen": 768081920 }, { "epoch": 0.23, "learning_rate": 0.0007749157438613385, "loss": 0.0777, "theoretical_loss": 3.7726335098848143, "tokens_seen": 768344064 }, { "epoch": 0.23, "learning_rate": 0.0007748354999197561, "loss": 0.0773, "theoretical_loss": 3.772505978671334, "tokens_seen": 768606208 }, { "epoch": 0.23, "learning_rate": 0.0007747552559781737, "loss": 0.081, "theoretical_loss": 3.772378503120996, "tokens_seen": 768868352 }, { "epoch": 0.23, "learning_rate": 0.0007746750120365913, "loss": 0.0785, "theoretical_loss": 3.7722510831905387, "tokens_seen": 769130496 }, { "epoch": 0.23, "learning_rate": 0.0007745947680950088, "loss": 0.0791, "theoretical_loss": 3.7721237188367494, "tokens_seen": 769392640 }, { "epoch": 0.23, "learning_rate": 0.0007745145241534264, "loss": 0.0773, "theoretical_loss": 3.771996410016464, "tokens_seen": 769654784 }, { "epoch": 0.23, "learning_rate": 0.000774434280211844, "loss": 0.0756, "theoretical_loss": 3.7718691566865665, "tokens_seen": 769916928 }, { "epoch": 0.23, "learning_rate": 0.0007743540362702615, "loss": 0.0781, "theoretical_loss": 3.7717419588039887, "tokens_seen": 770179072 }, { "epoch": 0.23, "learning_rate": 0.0007742737923286792, "loss": 0.0779, "theoretical_loss": 3.7716148163257115, "tokens_seen": 770441216 }, { "epoch": 0.23, "learning_rate": 0.0007741935483870968, "loss": 0.0748, "theoretical_loss": 3.7714877292087623, "tokens_seen": 770703360 }, { "epoch": 0.23, "learning_rate": 0.0007741133044455145, "loss": 0.0772, "theoretical_loss": 3.7713606974102167, "tokens_seen": 770965504 }, { "epoch": 0.23, "learning_rate": 0.000774033060503932, "loss": 0.0807, "theoretical_loss": 3.7712337208872, "tokens_seen": 771227648 }, { "epoch": 0.23, "learning_rate": 0.0007739528165623495, "loss": 0.0764, "theoretical_loss": 3.7711067995968826, "tokens_seen": 771489792 }, { "epoch": 0.23, "learning_rate": 0.0007738725726207672, "loss": 0.0797, "theoretical_loss": 3.770979933496485, "tokens_seen": 771751936 }, { "epoch": 0.23, "learning_rate": 0.0007737923286791847, "loss": 0.0769, "theoretical_loss": 3.770853122543274, "tokens_seen": 772014080 }, { "epoch": 0.23, "learning_rate": 0.0007737120847376023, "loss": 0.0752, "theoretical_loss": 3.770726366694564, "tokens_seen": 772276224 }, { "epoch": 0.23, "learning_rate": 0.0007736318407960199, "loss": 0.0788, "theoretical_loss": 3.7705996659077172, "tokens_seen": 772538368 }, { "epoch": 0.23, "learning_rate": 0.0007735515968544375, "loss": 0.0765, "theoretical_loss": 3.770473020140143, "tokens_seen": 772800512 }, { "epoch": 0.23, "learning_rate": 0.000773471352912855, "loss": 0.0816, "theoretical_loss": 3.770346429349299, "tokens_seen": 773062656 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0005200718878768384, "objective/train/docs_used": 286255, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5496317148208618, "objective/train/original_loss": 1.5496317148208618, "objective/train/theoretical_loss": 3.7702198934926896, "objective/train/tokens_used": 793784800, "objective/train/value_avg": -0.00848388671875, "objective/train/value_loss": 0.0003523084451444447, "objective/train/value_max": -0.0001302957534790039, "objective/train/value_min": -0.4052734375, "objective/train/value_reward_corr": 0.5911265747381873, "objective/train/value_std": 0.01352691650390625, "objective/train/weight_avg": 1.000672698020935, "objective/train/weighted_lm_loss": 1.550767421722412, "objective/train/weights_max": 1.2783743143081665, "objective/train/weights_min": 0.36907142400741577, "theoretical_loss": 3.7702198934926896, "tokens_seen": 773324800 }, { "epoch": 0.23, "learning_rate": 0.0007733911089712728, "loss": 0.0771, "theoretical_loss": 3.7702198934926896, "tokens_seen": 773324800 }, { "epoch": 0.23, "learning_rate": 0.0007733108650296903, "loss": 0.0765, "theoretical_loss": 3.7700934125278653, "tokens_seen": 773586944 }, { "epoch": 0.23, "learning_rate": 0.0007732306210881078, "loss": 0.0802, "theoretical_loss": 3.7699669864124266, "tokens_seen": 773849088 }, { "epoch": 0.23, "learning_rate": 0.0007731503771465255, "loss": 0.0774, "theoretical_loss": 3.769840615104018, "tokens_seen": 774111232 }, { "epoch": 0.23, "learning_rate": 0.000773070133204943, "loss": 0.0806, "theoretical_loss": 3.7697142985603325, "tokens_seen": 774373376 }, { "epoch": 0.23, "learning_rate": 0.0007729898892633606, "loss": 0.0798, "theoretical_loss": 3.76958803673911, "tokens_seen": 774635520 }, { "epoch": 0.23, "learning_rate": 0.0007729096453217782, "loss": 0.0797, "theoretical_loss": 3.7694618295981375, "tokens_seen": 774897664 }, { "epoch": 0.23, "learning_rate": 0.0007728294013801958, "loss": 0.0777, "theoretical_loss": 3.769335677095248, "tokens_seen": 775159808 }, { "epoch": 0.23, "learning_rate": 0.0007727491574386135, "loss": 0.0804, "theoretical_loss": 3.769209579188323, "tokens_seen": 775421952 }, { "epoch": 0.24, "learning_rate": 0.000772668913497031, "loss": 0.0778, "theoretical_loss": 3.7690835358352883, "tokens_seen": 775684096 }, { "epoch": 0.24, "learning_rate": 0.0007725886695554486, "loss": 0.0804, "theoretical_loss": 3.7689575469941183, "tokens_seen": 775946240 }, { "epoch": 0.24, "learning_rate": 0.0007725084256138662, "loss": 0.079, "theoretical_loss": 3.768831612622833, "tokens_seen": 776208384 }, { "epoch": 0.24, "learning_rate": 0.0007724281816722838, "loss": 0.0773, "theoretical_loss": 3.7687057326794986, "tokens_seen": 776470528 }, { "epoch": 0.24, "learning_rate": 0.0007723479377307013, "loss": 0.0805, "theoretical_loss": 3.768579907122229, "tokens_seen": 776732672 }, { "epoch": 0.24, "learning_rate": 0.000772267693789119, "loss": 0.078, "theoretical_loss": 3.768454135909183, "tokens_seen": 776994816 }, { "epoch": 0.24, "learning_rate": 0.0007721874498475365, "loss": 0.078, "theoretical_loss": 3.768328418998567, "tokens_seen": 777256960 }, { "epoch": 0.24, "learning_rate": 0.000772107205905954, "loss": 0.0777, "theoretical_loss": 3.7682027563486327, "tokens_seen": 777519104 }, { "epoch": 0.24, "learning_rate": 0.0007720269619643718, "loss": 0.0805, "theoretical_loss": 3.768077147917678, "tokens_seen": 777781248 }, { "epoch": 0.24, "learning_rate": 0.0007719467180227893, "loss": 0.0771, "theoretical_loss": 3.7679515936640477, "tokens_seen": 778043392 }, { "epoch": 0.24, "learning_rate": 0.0007718664740812069, "loss": 0.0757, "theoretical_loss": 3.7678260935461316, "tokens_seen": 778305536 }, { "epoch": 0.24, "learning_rate": 0.0007717862301396245, "loss": 0.074, "theoretical_loss": 3.767700647522366, "tokens_seen": 778567680 }, { "epoch": 0.24, "learning_rate": 0.0007717059861980421, "loss": 0.0735, "theoretical_loss": 3.7675752555512334, "tokens_seen": 778829824 }, { "epoch": 0.24, "learning_rate": 0.0007716257422564597, "loss": 0.0782, "theoretical_loss": 3.7674499175912617, "tokens_seen": 779091968 }, { "epoch": 0.24, "learning_rate": 0.0007715454983148772, "loss": 0.0774, "theoretical_loss": 3.767324633601024, "tokens_seen": 779354112 }, { "epoch": 0.24, "learning_rate": 0.0007714652543732948, "loss": 0.0786, "theoretical_loss": 3.7671994035391405, "tokens_seen": 779616256 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0016698199324309826, "objective/train/docs_used": 288522, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6363639831542969, "objective/train/original_loss": 1.6363639831542969, "objective/train/theoretical_loss": 3.767074227364275, "objective/train/tokens_used": 800338400, "objective/train/value_avg": -0.00970458984375, "objective/train/value_loss": 0.0003810957132373005, "objective/train/value_max": -0.00016224384307861328, "objective/train/value_min": -0.76025390625, "objective/train/value_reward_corr": 0.7845589972125679, "objective/train/value_std": 0.0241241455078125, "objective/train/weight_avg": 1.0018428564071655, "objective/train/weighted_lm_loss": 1.6393061876296997, "objective/train/weights_max": 1.5092625617980957, "objective/train/weights_min": 0.36901721358299255, "theoretical_loss": 3.767074227364275, "tokens_seen": 779878400 }, { "epoch": 0.24, "learning_rate": 0.0007713850104317124, "loss": 0.0804, "theoretical_loss": 3.767074227364275, "tokens_seen": 779878400 }, { "epoch": 0.24, "learning_rate": 0.00077130476649013, "loss": 0.0804, "theoretical_loss": 3.7669491050351396, "tokens_seen": 780140544 }, { "epoch": 0.24, "learning_rate": 0.0007712245225485476, "loss": 0.0792, "theoretical_loss": 3.7668240365104895, "tokens_seen": 780402688 }, { "epoch": 0.24, "learning_rate": 0.0007711442786069653, "loss": 0.0796, "theoretical_loss": 3.7666990217491265, "tokens_seen": 780664832 }, { "epoch": 0.24, "learning_rate": 0.0007710640346653828, "loss": 0.0777, "theoretical_loss": 3.7665740607098974, "tokens_seen": 780926976 }, { "epoch": 0.24, "learning_rate": 0.0007709837907238003, "loss": 0.077, "theoretical_loss": 3.7664491533516946, "tokens_seen": 781189120 }, { "epoch": 0.24, "learning_rate": 0.000770903546782218, "loss": 0.0771, "theoretical_loss": 3.766324299633455, "tokens_seen": 781451264 }, { "epoch": 0.24, "learning_rate": 0.0007708233028406355, "loss": 0.0786, "theoretical_loss": 3.766199499514162, "tokens_seen": 781713408 }, { "epoch": 0.24, "learning_rate": 0.0007707430588990531, "loss": 0.0753, "theoretical_loss": 3.7660747529528424, "tokens_seen": 781975552 }, { "epoch": 0.24, "learning_rate": 0.0007706628149574707, "loss": 0.0789, "theoretical_loss": 3.76595005990857, "tokens_seen": 782237696 }, { "epoch": 0.24, "learning_rate": 0.0007705825710158883, "loss": 0.0751, "theoretical_loss": 3.7658254203404615, "tokens_seen": 782499840 }, { "epoch": 0.24, "learning_rate": 0.0007705023270743058, "loss": 0.0771, "theoretical_loss": 3.7657008342076796, "tokens_seen": 782761984 }, { "epoch": 0.24, "learning_rate": 0.0007704220831327236, "loss": 0.076, "theoretical_loss": 3.765576301469433, "tokens_seen": 783024128 }, { "epoch": 0.24, "learning_rate": 0.0007703418391911411, "loss": 0.0775, "theoretical_loss": 3.7654518220849726, "tokens_seen": 783286272 }, { "epoch": 0.24, "learning_rate": 0.0007702615952495587, "loss": 0.0768, "theoretical_loss": 3.7653273960135962, "tokens_seen": 783548416 }, { "epoch": 0.24, "learning_rate": 0.0007701813513079763, "loss": 0.0809, "theoretical_loss": 3.765203023214645, "tokens_seen": 783810560 }, { "epoch": 0.24, "learning_rate": 0.0007701011073663938, "loss": 0.0754, "theoretical_loss": 3.7650787036475055, "tokens_seen": 784072704 }, { "epoch": 0.24, "learning_rate": 0.0007700208634248115, "loss": 0.0765, "theoretical_loss": 3.764954437271608, "tokens_seen": 784334848 }, { "epoch": 0.24, "learning_rate": 0.000769940619483229, "loss": 0.0718, "theoretical_loss": 3.7648302240464284, "tokens_seen": 784596992 }, { "epoch": 0.24, "learning_rate": 0.0007698603755416466, "loss": 0.0762, "theoretical_loss": 3.764706063931486, "tokens_seen": 784859136 }, { "epoch": 0.24, "learning_rate": 0.0007697801316000643, "loss": 0.0783, "theoretical_loss": 3.764581956886345, "tokens_seen": 785121280 }, { "epoch": 0.24, "learning_rate": 0.0007696998876584818, "loss": 0.0777, "theoretical_loss": 3.7644579028706135, "tokens_seen": 785383424 }, { "epoch": 0.24, "learning_rate": 0.0007696196437168994, "loss": 0.0756, "theoretical_loss": 3.764333901843944, "tokens_seen": 785645568 }, { "epoch": 0.24, "learning_rate": 0.000769539399775317, "loss": 0.0771, "theoretical_loss": 3.764209953766033, "tokens_seen": 785907712 }, { "epoch": 0.24, "learning_rate": 0.0007694591558337346, "loss": 0.0753, "theoretical_loss": 3.7640860585966207, "tokens_seen": 786169856 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0009356484515592456, "objective/train/docs_used": 290810, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.523367166519165, "objective/train/original_loss": 1.523367166519165, "objective/train/theoretical_loss": 3.763962216295493, "objective/train/tokens_used": 806892000, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.0002046849112957716, "objective/train/value_max": -0.0001838207244873047, "objective/train/value_min": -0.347900390625, "objective/train/value_reward_corr": 0.6702344519054119, "objective/train/value_std": 0.01255035400390625, "objective/train/weight_avg": 1.0010277032852173, "objective/train/weighted_lm_loss": 1.5253008604049683, "objective/train/weights_max": 1.1150742769241333, "objective/train/weights_min": 0.3685617744922638, "theoretical_loss": 3.763962216295493, "tokens_seen": 786432000 }, { "epoch": 0.24, "learning_rate": 0.0007693789118921521, "loss": 0.0757, "theoretical_loss": 3.763962216295493, "tokens_seen": 786432000 }, { "epoch": 0.24, "learning_rate": 0.0007692986679505698, "loss": 0.0761, "theoretical_loss": 3.7638384268224776, "tokens_seen": 786694144 }, { "epoch": 0.24, "learning_rate": 0.0007692184240089873, "loss": 0.0773, "theoretical_loss": 3.7637146901374474, "tokens_seen": 786956288 }, { "epoch": 0.24, "learning_rate": 0.0007691381800674048, "loss": 0.0785, "theoretical_loss": 3.7635910062003193, "tokens_seen": 787218432 }, { "epoch": 0.24, "learning_rate": 0.0007690579361258225, "loss": 0.0765, "theoretical_loss": 3.7634673749710523, "tokens_seen": 787480576 }, { "epoch": 0.24, "learning_rate": 0.00076897769218424, "loss": 0.0753, "theoretical_loss": 3.763343796409651, "tokens_seen": 787742720 }, { "epoch": 0.24, "learning_rate": 0.0007688974482426578, "loss": 0.076, "theoretical_loss": 3.7632202704761637, "tokens_seen": 788004864 }, { "epoch": 0.24, "learning_rate": 0.0007688172043010753, "loss": 0.0756, "theoretical_loss": 3.7630967971306797, "tokens_seen": 788267008 }, { "epoch": 0.24, "learning_rate": 0.0007687369603594929, "loss": 0.0767, "theoretical_loss": 3.762973376333335, "tokens_seen": 788529152 }, { "epoch": 0.24, "learning_rate": 0.0007686567164179105, "loss": 0.0775, "theoretical_loss": 3.7628500080443077, "tokens_seen": 788791296 }, { "epoch": 0.24, "learning_rate": 0.000768576472476328, "loss": 0.077, "theoretical_loss": 3.7627266922238185, "tokens_seen": 789053440 }, { "epoch": 0.24, "learning_rate": 0.0007684962285347456, "loss": 0.0768, "theoretical_loss": 3.762603428832133, "tokens_seen": 789315584 }, { "epoch": 0.24, "learning_rate": 0.0007684159845931632, "loss": 0.0751, "theoretical_loss": 3.7624802178295584, "tokens_seen": 789577728 }, { "epoch": 0.24, "learning_rate": 0.0007683357406515808, "loss": 0.0778, "theoretical_loss": 3.762357059176447, "tokens_seen": 789839872 }, { "epoch": 0.24, "learning_rate": 0.0007682554967099984, "loss": 0.0773, "theoretical_loss": 3.762233952833193, "tokens_seen": 790102016 }, { "epoch": 0.24, "learning_rate": 0.0007681752527684161, "loss": 0.0798, "theoretical_loss": 3.7621108987602336, "tokens_seen": 790364160 }, { "epoch": 0.24, "learning_rate": 0.0007680950088268336, "loss": 0.0764, "theoretical_loss": 3.76198789691805, "tokens_seen": 790626304 }, { "epoch": 0.24, "learning_rate": 0.0007680147648852511, "loss": 0.0798, "theoretical_loss": 3.7618649472671652, "tokens_seen": 790888448 }, { "epoch": 0.24, "learning_rate": 0.0007679345209436688, "loss": 0.0781, "theoretical_loss": 3.761742049768146, "tokens_seen": 791150592 }, { "epoch": 0.24, "learning_rate": 0.0007678542770020863, "loss": 0.0789, "theoretical_loss": 3.761619204381602, "tokens_seen": 791412736 }, { "epoch": 0.24, "learning_rate": 0.000767774033060504, "loss": 0.0751, "theoretical_loss": 3.7614964110681846, "tokens_seen": 791674880 }, { "epoch": 0.24, "learning_rate": 0.0007676937891189215, "loss": 0.0769, "theoretical_loss": 3.761373669788589, "tokens_seen": 791937024 }, { "epoch": 0.24, "learning_rate": 0.0007676135451773391, "loss": 0.0765, "theoretical_loss": 3.7612509805035526, "tokens_seen": 792199168 }, { "epoch": 0.24, "learning_rate": 0.0007675333012357568, "loss": 0.075, "theoretical_loss": 3.761128343173856, "tokens_seen": 792461312 }, { "epoch": 0.24, "learning_rate": 0.0007674530572941743, "loss": 0.0755, "theoretical_loss": 3.7610057577603215, "tokens_seen": 792723456 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0012367293238639832, "objective/train/docs_used": 293128, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5767234563827515, "objective/train/original_loss": 1.576723575592041, "objective/train/theoretical_loss": 3.7608832242238144, "objective/train/tokens_used": 813445600, "objective/train/value_avg": -0.0086669921875, "objective/train/value_loss": 0.00022169639123603702, "objective/train/value_max": -0.0002472400665283203, "objective/train/value_min": -0.58544921875, "objective/train/value_reward_corr": 0.6488604890647144, "objective/train/value_std": 0.0127410888671875, "objective/train/weight_avg": 1.001336932182312, "objective/train/weighted_lm_loss": 1.5789175033569336, "objective/train/weights_max": 1.3074339628219604, "objective/train/weights_min": 0.36864086985588074, "theoretical_loss": 3.7608832242238144, "tokens_seen": 792985600 }, { "epoch": 0.24, "learning_rate": 0.0007673728133525919, "loss": 0.0782, "theoretical_loss": 3.7608832242238144, "tokens_seen": 792985600 }, { "epoch": 0.24, "learning_rate": 0.0007672925694110095, "loss": 0.0778, "theoretical_loss": 3.7607607425252416, "tokens_seen": 793247744 }, { "epoch": 0.24, "learning_rate": 0.0007672123254694271, "loss": 0.0788, "theoretical_loss": 3.7606383126255536, "tokens_seen": 793509888 }, { "epoch": 0.24, "learning_rate": 0.0007671320815278446, "loss": 0.0792, "theoretical_loss": 3.760515934485743, "tokens_seen": 793772032 }, { "epoch": 0.24, "learning_rate": 0.0007670518375862623, "loss": 0.0776, "theoretical_loss": 3.760393608066843, "tokens_seen": 794034176 }, { "epoch": 0.24, "learning_rate": 0.0007669715936446798, "loss": 0.0759, "theoretical_loss": 3.760271333329932, "tokens_seen": 794296320 }, { "epoch": 0.24, "learning_rate": 0.0007668913497030974, "loss": 0.0779, "theoretical_loss": 3.7601491102361275, "tokens_seen": 794558464 }, { "epoch": 0.24, "learning_rate": 0.000766811105761515, "loss": 0.0776, "theoretical_loss": 3.7600269387465914, "tokens_seen": 794820608 }, { "epoch": 0.24, "learning_rate": 0.0007667308618199326, "loss": 0.078, "theoretical_loss": 3.759904818822525, "tokens_seen": 795082752 }, { "epoch": 0.24, "learning_rate": 0.0007666506178783502, "loss": 0.0787, "theoretical_loss": 3.759782750425175, "tokens_seen": 795344896 }, { "epoch": 0.24, "learning_rate": 0.0007665703739367678, "loss": 0.076, "theoretical_loss": 3.759660733515826, "tokens_seen": 795607040 }, { "epoch": 0.24, "learning_rate": 0.0007664901299951854, "loss": 0.0765, "theoretical_loss": 3.7595387680558088, "tokens_seen": 795869184 }, { "epoch": 0.24, "learning_rate": 0.000766409886053603, "loss": 0.0791, "theoretical_loss": 3.759416854006492, "tokens_seen": 796131328 }, { "epoch": 0.24, "learning_rate": 0.0007663296421120206, "loss": 0.0793, "theoretical_loss": 3.7592949913292886, "tokens_seen": 796393472 }, { "epoch": 0.24, "learning_rate": 0.0007662493981704381, "loss": 0.0786, "theoretical_loss": 3.759173179985652, "tokens_seen": 796655616 }, { "epoch": 0.24, "learning_rate": 0.0007661691542288557, "loss": 0.0747, "theoretical_loss": 3.7590514199370775, "tokens_seen": 796917760 }, { "epoch": 0.24, "learning_rate": 0.0007660889102872733, "loss": 0.0735, "theoretical_loss": 3.758929711145101, "tokens_seen": 797179904 }, { "epoch": 0.24, "learning_rate": 0.0007660086663456909, "loss": 0.0756, "theoretical_loss": 3.758808053571302, "tokens_seen": 797442048 }, { "epoch": 0.24, "learning_rate": 0.0007659284224041086, "loss": 0.0805, "theoretical_loss": 3.7586864471772996, "tokens_seen": 797704192 }, { "epoch": 0.24, "learning_rate": 0.0007658481784625261, "loss": 0.0771, "theoretical_loss": 3.758564891924755, "tokens_seen": 797966336 }, { "epoch": 0.24, "learning_rate": 0.0007657679345209437, "loss": 0.0792, "theoretical_loss": 3.758443387775371, "tokens_seen": 798228480 }, { "epoch": 0.24, "learning_rate": 0.0007656876905793613, "loss": 0.0762, "theoretical_loss": 3.7583219346908905, "tokens_seen": 798490624 }, { "epoch": 0.24, "learning_rate": 0.0007656074466377788, "loss": 0.077, "theoretical_loss": 3.758200532633099, "tokens_seen": 798752768 }, { "epoch": 0.24, "learning_rate": 0.0007655272026961964, "loss": 0.0767, "theoretical_loss": 3.7580791815638213, "tokens_seen": 799014912 }, { "epoch": 0.24, "learning_rate": 0.000765446958754614, "loss": 0.0782, "theoretical_loss": 3.7579578814449253, "tokens_seen": 799277056 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0012315761996433139, "objective/train/docs_used": 295724, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5393258333206177, "objective/train/original_loss": 1.5393258333206177, "objective/train/theoretical_loss": 3.7578366322383188, "objective/train/tokens_used": 819999200, "objective/train/value_avg": -0.007904052734375, "objective/train/value_loss": 0.00038409262197092175, "objective/train/value_max": -0.00012433528900146484, "objective/train/value_min": -0.87158203125, "objective/train/value_reward_corr": 0.6161805024099198, "objective/train/value_std": 0.0164794921875, "objective/train/weight_avg": 1.0014045238494873, "objective/train/weighted_lm_loss": 1.5412237644195557, "objective/train/weights_max": 1.616646409034729, "objective/train/weights_min": 0.3682824373245239, "theoretical_loss": 3.7578366322383188, "tokens_seen": 799539200 }, { "epoch": 0.24, "learning_rate": 0.0007653667148130316, "loss": 0.0778, "theoretical_loss": 3.7578366322383188, "tokens_seen": 799539200 }, { "epoch": 0.24, "learning_rate": 0.0007652864708714491, "loss": 0.0762, "theoretical_loss": 3.7577154339059504, "tokens_seen": 799801344 }, { "epoch": 0.24, "learning_rate": 0.0007652062269298669, "loss": 0.0768, "theoretical_loss": 3.7575942864098106, "tokens_seen": 800063488 }, { "epoch": 0.24, "learning_rate": 0.0007651259829882844, "loss": 0.0755, "theoretical_loss": 3.75747318971193, "tokens_seen": 800325632 }, { "epoch": 0.24, "learning_rate": 0.000765045739046702, "loss": 0.0757, "theoretical_loss": 3.7573521437743795, "tokens_seen": 800587776 }, { "epoch": 0.24, "learning_rate": 0.0007649654951051196, "loss": 0.0775, "theoretical_loss": 3.7572311485592715, "tokens_seen": 800849920 }, { "epoch": 0.24, "learning_rate": 0.0007648852511635371, "loss": 0.0754, "theoretical_loss": 3.7571102040287596, "tokens_seen": 801112064 }, { "epoch": 0.24, "learning_rate": 0.0007648050072219548, "loss": 0.0803, "theoretical_loss": 3.7569893101450367, "tokens_seen": 801374208 }, { "epoch": 0.24, "learning_rate": 0.0007647247632803723, "loss": 0.0804, "theoretical_loss": 3.756868466870337, "tokens_seen": 801636352 }, { "epoch": 0.24, "learning_rate": 0.0007646445193387899, "loss": 0.078, "theoretical_loss": 3.7567476741669346, "tokens_seen": 801898496 }, { "epoch": 0.24, "learning_rate": 0.0007645642753972076, "loss": 0.0778, "theoretical_loss": 3.756626931997145, "tokens_seen": 802160640 }, { "epoch": 0.24, "learning_rate": 0.0007644840314556251, "loss": 0.0745, "theoretical_loss": 3.7565062403233234, "tokens_seen": 802422784 }, { "epoch": 0.24, "learning_rate": 0.0007644037875140427, "loss": 0.0742, "theoretical_loss": 3.7563855991078654, "tokens_seen": 802684928 }, { "epoch": 0.24, "learning_rate": 0.0007643235435724603, "loss": 0.0725, "theoretical_loss": 3.7562650083132074, "tokens_seen": 802947072 }, { "epoch": 0.24, "learning_rate": 0.0007642432996308779, "loss": 0.0782, "theoretical_loss": 3.756144467901825, "tokens_seen": 803209216 }, { "epoch": 0.24, "learning_rate": 0.0007641630556892954, "loss": 0.0777, "theoretical_loss": 3.756023977836235, "tokens_seen": 803471360 }, { "epoch": 0.24, "learning_rate": 0.0007640828117477131, "loss": 0.0767, "theoretical_loss": 3.755903538078994, "tokens_seen": 803733504 }, { "epoch": 0.24, "learning_rate": 0.0007640025678061306, "loss": 0.0803, "theoretical_loss": 3.7557831485926982, "tokens_seen": 803995648 }, { "epoch": 0.24, "learning_rate": 0.0007639223238645483, "loss": 0.0786, "theoretical_loss": 3.7556628093399835, "tokens_seen": 804257792 }, { "epoch": 0.24, "learning_rate": 0.0007638420799229659, "loss": 0.0802, "theoretical_loss": 3.7555425202835275, "tokens_seen": 804519936 }, { "epoch": 0.24, "learning_rate": 0.0007637618359813834, "loss": 0.0736, "theoretical_loss": 3.7554222813860463, "tokens_seen": 804782080 }, { "epoch": 0.24, "learning_rate": 0.0007636815920398011, "loss": 0.0769, "theoretical_loss": 3.7553020926102954, "tokens_seen": 805044224 }, { "epoch": 0.24, "learning_rate": 0.0007636013480982186, "loss": 0.0786, "theoretical_loss": 3.755181953919071, "tokens_seen": 805306368 }, { "epoch": 0.24, "learning_rate": 0.0007635211041566362, "loss": 0.0769, "theoretical_loss": 3.755061865275209, "tokens_seen": 805568512 }, { "epoch": 0.24, "learning_rate": 0.0007634408602150538, "loss": 0.0779, "theoretical_loss": 3.754941826641584, "tokens_seen": 805830656 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0008438312797807157, "objective/train/docs_used": 297892, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4582135677337646, "objective/train/original_loss": 1.458213448524475, "objective/train/theoretical_loss": 3.754821837981112, "objective/train/tokens_used": 826552800, "objective/train/value_avg": -0.005859375, "objective/train/value_loss": 0.0003289504675194621, "objective/train/value_max": -0.00010973215103149414, "objective/train/value_min": -0.73828125, "objective/train/value_reward_corr": 0.4976893790214529, "objective/train/value_std": 0.0099029541015625, "objective/train/weight_avg": 1.0009607076644897, "objective/train/weighted_lm_loss": 1.4596952199935913, "objective/train/weights_max": 1.2155424356460571, "objective/train/weights_min": 0.05370466783642769, "theoretical_loss": 3.754821837981112, "tokens_seen": 806092800 }, { "epoch": 0.24, "learning_rate": 0.0007633606162734714, "loss": 0.0753, "theoretical_loss": 3.754821837981112, "tokens_seen": 806092800 }, { "epoch": 0.24, "learning_rate": 0.0007632803723318889, "loss": 0.0759, "theoretical_loss": 3.7547018992567462, "tokens_seen": 806354944 }, { "epoch": 0.24, "learning_rate": 0.0007632001283903065, "loss": 0.079, "theoretical_loss": 3.7545820104314815, "tokens_seen": 806617088 }, { "epoch": 0.24, "learning_rate": 0.0007631198844487241, "loss": 0.0746, "theoretical_loss": 3.7544621714683517, "tokens_seen": 806879232 }, { "epoch": 0.24, "learning_rate": 0.0007630396405071417, "loss": 0.0763, "theoretical_loss": 3.754342382330428, "tokens_seen": 807141376 }, { "epoch": 0.24, "learning_rate": 0.0007629593965655594, "loss": 0.0746, "theoretical_loss": 3.7542226429808236, "tokens_seen": 807403520 }, { "epoch": 0.24, "learning_rate": 0.0007628791526239769, "loss": 0.0742, "theoretical_loss": 3.7541029533826893, "tokens_seen": 807665664 }, { "epoch": 0.24, "learning_rate": 0.0007627989086823945, "loss": 0.0753, "theoretical_loss": 3.7539833134992158, "tokens_seen": 807927808 }, { "epoch": 0.24, "learning_rate": 0.0007627186647408121, "loss": 0.0783, "theoretical_loss": 3.753863723293634, "tokens_seen": 808189952 }, { "epoch": 0.24, "learning_rate": 0.0007626384207992296, "loss": 0.0766, "theoretical_loss": 3.7537441827292106, "tokens_seen": 808452096 }, { "epoch": 0.25, "learning_rate": 0.0007625581768576473, "loss": 0.077, "theoretical_loss": 3.753624691769255, "tokens_seen": 808714240 }, { "epoch": 0.25, "learning_rate": 0.0007624779329160648, "loss": 0.079, "theoretical_loss": 3.7535052503771142, "tokens_seen": 808976384 }, { "epoch": 0.25, "learning_rate": 0.0007623976889744824, "loss": 0.0756, "theoretical_loss": 3.7533858585161735, "tokens_seen": 809238528 }, { "epoch": 0.25, "learning_rate": 0.0007623174450329001, "loss": 0.0796, "theoretical_loss": 3.753266516149858, "tokens_seen": 809500672 }, { "epoch": 0.25, "learning_rate": 0.0007622372010913177, "loss": 0.0766, "theoretical_loss": 3.7531472232416316, "tokens_seen": 809762816 }, { "epoch": 0.25, "learning_rate": 0.0007621569571497352, "loss": 0.0736, "theoretical_loss": 3.7530279797549957, "tokens_seen": 810024960 }, { "epoch": 0.25, "learning_rate": 0.0007620767132081528, "loss": 0.0748, "theoretical_loss": 3.752908785653492, "tokens_seen": 810287104 }, { "epoch": 0.25, "learning_rate": 0.0007619964692665704, "loss": 0.0756, "theoretical_loss": 3.7527896409007004, "tokens_seen": 810549248 }, { "epoch": 0.25, "learning_rate": 0.0007619162253249879, "loss": 0.078, "theoretical_loss": 3.7526705454602394, "tokens_seen": 810811392 }, { "epoch": 0.25, "learning_rate": 0.0007618359813834056, "loss": 0.077, "theoretical_loss": 3.752551499295766, "tokens_seen": 811073536 }, { "epoch": 0.25, "learning_rate": 0.0007617557374418231, "loss": 0.0766, "theoretical_loss": 3.7524325023709757, "tokens_seen": 811335680 }, { "epoch": 0.25, "learning_rate": 0.0007616754935002407, "loss": 0.0736, "theoretical_loss": 3.7523135546496023, "tokens_seen": 811597824 }, { "epoch": 0.25, "learning_rate": 0.0007615952495586584, "loss": 0.0772, "theoretical_loss": 3.7521946560954182, "tokens_seen": 811859968 }, { "epoch": 0.25, "learning_rate": 0.0007615150056170759, "loss": 0.0767, "theoretical_loss": 3.7520758066722344, "tokens_seen": 812122112 }, { "epoch": 0.25, "learning_rate": 0.0007614347616754936, "loss": 0.075, "theoretical_loss": 3.7519570063438996, "tokens_seen": 812384256 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0014068076852709055, "objective/train/docs_used": 300102, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5233573913574219, "objective/train/original_loss": 1.5233573913574219, "objective/train/theoretical_loss": 3.7518382550743024, "objective/train/tokens_used": 833106400, "objective/train/value_avg": -0.006847381591796875, "objective/train/value_loss": 0.00014167482731863856, "objective/train/value_max": -0.0001150369644165039, "objective/train/value_min": -0.31005859375, "objective/train/value_reward_corr": 0.5786646527156373, "objective/train/value_std": 0.01003265380859375, "objective/train/weight_avg": 1.0014715194702148, "objective/train/weighted_lm_loss": 1.5256245136260986, "objective/train/weights_max": 1.1971921920776367, "objective/train/weights_min": 0.36875900626182556, "theoretical_loss": 3.7518382550743024, "tokens_seen": 812646400 }, { "epoch": 0.25, "learning_rate": 0.0007613545177339111, "loss": 0.0763, "theoretical_loss": 3.7518382550743024, "tokens_seen": 812646400 }, { "epoch": 0.25, "learning_rate": 0.0007612742737923287, "loss": 0.0777, "theoretical_loss": 3.7517195528273666, "tokens_seen": 812908544 }, { "epoch": 0.25, "learning_rate": 0.0007611940298507463, "loss": 0.0758, "theoretical_loss": 3.751600899567057, "tokens_seen": 813170688 }, { "epoch": 0.25, "learning_rate": 0.0007611137859091639, "loss": 0.0724, "theoretical_loss": 3.7514822952573743, "tokens_seen": 813432832 }, { "epoch": 0.25, "learning_rate": 0.0007610335419675814, "loss": 0.0772, "theoretical_loss": 3.7513637398623603, "tokens_seen": 813694976 }, { "epoch": 0.25, "learning_rate": 0.0007609532980259991, "loss": 0.0756, "theoretical_loss": 3.751245233346091, "tokens_seen": 813957120 }, { "epoch": 0.25, "learning_rate": 0.0007608730540844167, "loss": 0.0768, "theoretical_loss": 3.7511267756726823, "tokens_seen": 814219264 }, { "epoch": 0.25, "learning_rate": 0.0007607928101428342, "loss": 0.0783, "theoretical_loss": 3.7510083668062886, "tokens_seen": 814481408 }, { "epoch": 0.25, "learning_rate": 0.0007607125662012519, "loss": 0.075, "theoretical_loss": 3.750890006711101, "tokens_seen": 814743552 }, { "epoch": 0.25, "learning_rate": 0.0007606323222596694, "loss": 0.075, "theoretical_loss": 3.7507716953513492, "tokens_seen": 815005696 }, { "epoch": 0.25, "learning_rate": 0.000760552078318087, "loss": 0.0784, "theoretical_loss": 3.7506534326912995, "tokens_seen": 815267840 }, { "epoch": 0.25, "learning_rate": 0.0007604718343765046, "loss": 0.0769, "theoretical_loss": 3.7505352186952567, "tokens_seen": 815529984 }, { "epoch": 0.25, "learning_rate": 0.0007603915904349222, "loss": 0.0738, "theoretical_loss": 3.7504170533275634, "tokens_seen": 815792128 }, { "epoch": 0.25, "learning_rate": 0.0007603113464933397, "loss": 0.0732, "theoretical_loss": 3.7502989365526, "tokens_seen": 816054272 }, { "epoch": 0.25, "learning_rate": 0.0007602311025517573, "loss": 0.0762, "theoretical_loss": 3.7501808683347826, "tokens_seen": 816316416 }, { "epoch": 0.25, "learning_rate": 0.000760150858610175, "loss": 0.0751, "theoretical_loss": 3.7500628486385668, "tokens_seen": 816578560 }, { "epoch": 0.25, "learning_rate": 0.0007600706146685926, "loss": 0.0744, "theoretical_loss": 3.7499448774284447, "tokens_seen": 816840704 }, { "epoch": 0.25, "learning_rate": 0.0007599903707270102, "loss": 0.0771, "theoretical_loss": 3.749826954668946, "tokens_seen": 817102848 }, { "epoch": 0.25, "learning_rate": 0.0007599101267854277, "loss": 0.077, "theoretical_loss": 3.7497090803246387, "tokens_seen": 817364992 }, { "epoch": 0.25, "learning_rate": 0.0007598298828438454, "loss": 0.0749, "theoretical_loss": 3.7495912543601246, "tokens_seen": 817627136 }, { "epoch": 0.25, "learning_rate": 0.0007597496389022629, "loss": 0.0775, "theoretical_loss": 3.7494734767400475, "tokens_seen": 817889280 }, { "epoch": 0.25, "learning_rate": 0.0007596693949606804, "loss": 0.0766, "theoretical_loss": 3.7493557474290853, "tokens_seen": 818151424 }, { "epoch": 0.25, "learning_rate": 0.0007595891510190981, "loss": 0.0743, "theoretical_loss": 3.7492380663919533, "tokens_seen": 818413568 }, { "epoch": 0.25, "learning_rate": 0.0007595089070775156, "loss": 0.0762, "theoretical_loss": 3.7491204335934043, "tokens_seen": 818675712 }, { "epoch": 0.25, "learning_rate": 0.0007594286631359332, "loss": 0.076, "theoretical_loss": 3.7490028489982286, "tokens_seen": 818937856 }, { "epoch": 0.25, "objective/train/advantage_avg": -2.009487616305705e-05, "objective/train/docs_used": 302476, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5615952014923096, "objective/train/original_loss": 1.5615952014923096, "objective/train/theoretical_loss": 3.7488853125712525, "objective/train/tokens_used": 839660000, "objective/train/value_avg": -0.0078582763671875, "objective/train/value_loss": 0.00040608173003420234, "objective/train/value_max": -0.0001323223114013672, "objective/train/value_min": -0.56494140625, "objective/train/value_reward_corr": 0.58299361157949, "objective/train/value_std": 0.01151275634765625, "objective/train/weight_avg": 1.0001566410064697, "objective/train/weighted_lm_loss": 1.563400387763977, "objective/train/weights_max": 1.6039460897445679, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 3.7488853125712525, "tokens_seen": 819200000 }, { "epoch": 0.25, "learning_rate": 0.0007593484191943509, "loss": 0.0727, "theoretical_loss": 3.7488853125712525, "tokens_seen": 819200000 }, { "epoch": 0.25, "learning_rate": 0.0007592681752527685, "loss": 0.0738, "theoretical_loss": 3.7487678242773406, "tokens_seen": 819462144 }, { "epoch": 0.25, "learning_rate": 0.000759187931311186, "loss": 0.0772, "theoretical_loss": 3.748650384081392, "tokens_seen": 819724288 }, { "epoch": 0.25, "learning_rate": 0.0007591076873696036, "loss": 0.0771, "theoretical_loss": 3.7485329919483448, "tokens_seen": 819986432 }, { "epoch": 0.25, "learning_rate": 0.0007590274434280212, "loss": 0.0746, "theoretical_loss": 3.7484156478431734, "tokens_seen": 820248576 }, { "epoch": 0.25, "learning_rate": 0.0007589471994864387, "loss": 0.075, "theoretical_loss": 3.748298351730888, "tokens_seen": 820510720 }, { "epoch": 0.25, "learning_rate": 0.0007588669555448564, "loss": 0.0757, "theoretical_loss": 3.748181103576537, "tokens_seen": 820772864 }, { "epoch": 0.25, "learning_rate": 0.0007587867116032739, "loss": 0.0773, "theoretical_loss": 3.7480639033452032, "tokens_seen": 821035008 }, { "epoch": 0.25, "learning_rate": 0.0007587064676616916, "loss": 0.078, "theoretical_loss": 3.747946751002009, "tokens_seen": 821297152 }, { "epoch": 0.25, "learning_rate": 0.0007586262237201092, "loss": 0.0784, "theoretical_loss": 3.747829646512109, "tokens_seen": 821559296 }, { "epoch": 0.25, "learning_rate": 0.0007585459797785267, "loss": 0.0763, "theoretical_loss": 3.747712589840699, "tokens_seen": 821821440 }, { "epoch": 0.25, "learning_rate": 0.0007584657358369444, "loss": 0.0751, "theoretical_loss": 3.7475955809530084, "tokens_seen": 822083584 }, { "epoch": 0.25, "learning_rate": 0.0007583854918953619, "loss": 0.0731, "theoretical_loss": 3.747478619814303, "tokens_seen": 822345728 }, { "epoch": 0.25, "learning_rate": 0.0007583052479537795, "loss": 0.0792, "theoretical_loss": 3.7473617063898863, "tokens_seen": 822607872 }, { "epoch": 0.25, "learning_rate": 0.0007582250040121971, "loss": 0.0767, "theoretical_loss": 3.747244840645097, "tokens_seen": 822870016 }, { "epoch": 0.25, "learning_rate": 0.0007581447600706147, "loss": 0.0751, "theoretical_loss": 3.7471280225453096, "tokens_seen": 823132160 }, { "epoch": 0.25, "learning_rate": 0.0007580645161290322, "loss": 0.0764, "theoretical_loss": 3.747011252055936, "tokens_seen": 823394304 }, { "epoch": 0.25, "learning_rate": 0.0007579842721874498, "loss": 0.075, "theoretical_loss": 3.746894529142424, "tokens_seen": 823656448 }, { "epoch": 0.25, "learning_rate": 0.0007579040282458675, "loss": 0.0762, "theoretical_loss": 3.746777853770256, "tokens_seen": 823918592 }, { "epoch": 0.25, "learning_rate": 0.000757823784304285, "loss": 0.0751, "theoretical_loss": 3.746661225904953, "tokens_seen": 824180736 }, { "epoch": 0.25, "learning_rate": 0.0007577435403627027, "loss": 0.0776, "theoretical_loss": 3.746544645512069, "tokens_seen": 824442880 }, { "epoch": 0.25, "learning_rate": 0.0007576632964211202, "loss": 0.0753, "theoretical_loss": 3.7464281125571963, "tokens_seen": 824705024 }, { "epoch": 0.25, "learning_rate": 0.0007575830524795379, "loss": 0.0762, "theoretical_loss": 3.7463116270059618, "tokens_seen": 824967168 }, { "epoch": 0.25, "learning_rate": 0.0007575028085379554, "loss": 0.0748, "theoretical_loss": 3.7461951888240286, "tokens_seen": 825229312 }, { "epoch": 0.25, "learning_rate": 0.000757422564596373, "loss": 0.0769, "theoretical_loss": 3.7460787979770958, "tokens_seen": 825491456 }, { "epoch": 0.25, "objective/train/advantage_avg": -6.741825927747414e-05, "objective/train/docs_used": 304950, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5155413150787354, "objective/train/original_loss": 1.5155411958694458, "objective/train/theoretical_loss": 3.745962454430897, "objective/train/tokens_used": 846213600, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.0001769889786373824, "objective/train/value_max": -0.00011682510375976562, "objective/train/value_min": -0.30322265625, "objective/train/value_reward_corr": 0.7321892783075874, "objective/train/value_std": 0.01239776611328125, "objective/train/weight_avg": 1.000016212463379, "objective/train/weighted_lm_loss": 1.5158941745758057, "objective/train/weights_max": 1.2777711153030396, "objective/train/weights_min": 0.37355300784111023, "theoretical_loss": 3.745962454430897, "tokens_seen": 825753600 }, { "epoch": 0.25, "learning_rate": 0.0007573423206547906, "loss": 0.0764, "theoretical_loss": 3.745962454430897, "tokens_seen": 825753600 }, { "epoch": 0.25, "learning_rate": 0.0007572620767132081, "loss": 0.0785, "theoretical_loss": 3.745846158151204, "tokens_seen": 826015744 }, { "epoch": 0.25, "learning_rate": 0.0007571818327716257, "loss": 0.0765, "theoretical_loss": 3.7457299091038214, "tokens_seen": 826277888 }, { "epoch": 0.25, "learning_rate": 0.0007571015888300434, "loss": 0.0748, "theoretical_loss": 3.745613707254591, "tokens_seen": 826540032 }, { "epoch": 0.25, "learning_rate": 0.000757021344888461, "loss": 0.0763, "theoretical_loss": 3.7454975525693897, "tokens_seen": 826802176 }, { "epoch": 0.25, "learning_rate": 0.0007569411009468785, "loss": 0.076, "theoretical_loss": 3.7453814450141305, "tokens_seen": 827064320 }, { "epoch": 0.25, "learning_rate": 0.0007568608570052962, "loss": 0.0806, "theoretical_loss": 3.7452653845547603, "tokens_seen": 827326464 }, { "epoch": 0.25, "learning_rate": 0.0007567806130637137, "loss": 0.0798, "theoretical_loss": 3.745149371157263, "tokens_seen": 827588608 }, { "epoch": 0.25, "learning_rate": 0.0007567003691221312, "loss": 0.0778, "theoretical_loss": 3.7450334047876574, "tokens_seen": 827850752 }, { "epoch": 0.25, "learning_rate": 0.0007566201251805489, "loss": 0.0779, "theoretical_loss": 3.744917485411997, "tokens_seen": 828112896 }, { "epoch": 0.25, "learning_rate": 0.0007565398812389664, "loss": 0.0803, "theoretical_loss": 3.744801612996371, "tokens_seen": 828375040 }, { "epoch": 0.25, "learning_rate": 0.000756459637297384, "loss": 0.0766, "theoretical_loss": 3.744685787506903, "tokens_seen": 828637184 }, { "epoch": 0.25, "learning_rate": 0.0007563793933558017, "loss": 0.0775, "theoretical_loss": 3.7445700089097533, "tokens_seen": 828899328 }, { "epoch": 0.25, "learning_rate": 0.0007562991494142193, "loss": 0.0764, "theoretical_loss": 3.7444542771711165, "tokens_seen": 829161472 }, { "epoch": 0.25, "learning_rate": 0.0007562189054726369, "loss": 0.0745, "theoretical_loss": 3.744338592257222, "tokens_seen": 829423616 }, { "epoch": 0.25, "learning_rate": 0.0007561386615310544, "loss": 0.076, "theoretical_loss": 3.744222954134334, "tokens_seen": 829685760 }, { "epoch": 0.25, "learning_rate": 0.000756058417589472, "loss": 0.0747, "theoretical_loss": 3.7441073627687524, "tokens_seen": 829947904 }, { "epoch": 0.25, "learning_rate": 0.0007559781736478896, "loss": 0.0769, "theoretical_loss": 3.743991818126812, "tokens_seen": 830210048 }, { "epoch": 0.25, "learning_rate": 0.0007558979297063072, "loss": 0.0761, "theoretical_loss": 3.7438763201748815, "tokens_seen": 830472192 }, { "epoch": 0.25, "learning_rate": 0.0007558176857647247, "loss": 0.0765, "theoretical_loss": 3.743760868879365, "tokens_seen": 830734336 }, { "epoch": 0.25, "learning_rate": 0.0007557374418231424, "loss": 0.0748, "theoretical_loss": 3.743645464206702, "tokens_seen": 830996480 }, { "epoch": 0.25, "learning_rate": 0.00075565719788156, "loss": 0.0757, "theoretical_loss": 3.743530106123365, "tokens_seen": 831258624 }, { "epoch": 0.25, "learning_rate": 0.0007555769539399775, "loss": 0.0756, "theoretical_loss": 3.7434147945958642, "tokens_seen": 831520768 }, { "epoch": 0.25, "learning_rate": 0.0007554967099983952, "loss": 0.0766, "theoretical_loss": 3.7432995295907405, "tokens_seen": 831782912 }, { "epoch": 0.25, "learning_rate": 0.0007554164660568127, "loss": 0.0781, "theoretical_loss": 3.7431843110745726, "tokens_seen": 832045056 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0015261891530826688, "objective/train/docs_used": 307306, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4608768224716187, "objective/train/original_loss": 1.4608770608901978, "objective/train/theoretical_loss": 3.743069139013972, "objective/train/tokens_used": 852767200, "objective/train/value_avg": -0.009246826171875, "objective/train/value_loss": 0.00019339752907399088, "objective/train/value_max": -0.00014889240264892578, "objective/train/value_min": -0.43701171875, "objective/train/value_reward_corr": 0.673779683354758, "objective/train/value_std": 0.0131378173828125, "objective/train/weight_avg": 1.0016170740127563, "objective/train/weighted_lm_loss": 1.4626826047897339, "objective/train/weights_max": 1.353039264678955, "objective/train/weights_min": 0.37851035594940186, "theoretical_loss": 3.743069139013972, "tokens_seen": 832307200 }, { "epoch": 0.25, "learning_rate": 0.0007553362221152303, "loss": 0.075, "theoretical_loss": 3.743069139013972, "tokens_seen": 832307200 }, { "epoch": 0.25, "learning_rate": 0.0007552559781736479, "loss": 0.076, "theoretical_loss": 3.742954013375586, "tokens_seen": 832569344 }, { "epoch": 0.25, "learning_rate": 0.0007551757342320655, "loss": 0.0768, "theoretical_loss": 3.742838934126094, "tokens_seen": 832831488 }, { "epoch": 0.25, "learning_rate": 0.000755095490290483, "loss": 0.0764, "theoretical_loss": 3.742723901232213, "tokens_seen": 833093632 }, { "epoch": 0.25, "learning_rate": 0.0007550152463489006, "loss": 0.0764, "theoretical_loss": 3.742608914660692, "tokens_seen": 833355776 }, { "epoch": 0.25, "learning_rate": 0.0007549350024073182, "loss": 0.078, "theoretical_loss": 3.742493974378314, "tokens_seen": 833617920 }, { "epoch": 0.25, "learning_rate": 0.0007548547584657359, "loss": 0.0751, "theoretical_loss": 3.742379080351899, "tokens_seen": 833880064 }, { "epoch": 0.25, "learning_rate": 0.0007547745145241535, "loss": 0.079, "theoretical_loss": 3.7422642325482975, "tokens_seen": 834142208 }, { "epoch": 0.25, "learning_rate": 0.000754694270582571, "loss": 0.0808, "theoretical_loss": 3.742149430934398, "tokens_seen": 834404352 }, { "epoch": 0.25, "learning_rate": 0.0007546140266409887, "loss": 0.0806, "theoretical_loss": 3.7420346754771208, "tokens_seen": 834666496 }, { "epoch": 0.25, "learning_rate": 0.0007545337826994062, "loss": 0.0763, "theoretical_loss": 3.7419199661434197, "tokens_seen": 834928640 }, { "epoch": 0.25, "learning_rate": 0.0007544535387578238, "loss": 0.078, "theoretical_loss": 3.7418053029002842, "tokens_seen": 835190784 }, { "epoch": 0.25, "learning_rate": 0.0007543732948162414, "loss": 0.0738, "theoretical_loss": 3.7416906857147367, "tokens_seen": 835452928 }, { "epoch": 0.25, "learning_rate": 0.0007542930508746589, "loss": 0.0758, "theoretical_loss": 3.741576114553835, "tokens_seen": 835715072 }, { "epoch": 0.25, "learning_rate": 0.0007542128069330765, "loss": 0.0775, "theoretical_loss": 3.7414615893846683, "tokens_seen": 835977216 }, { "epoch": 0.25, "learning_rate": 0.0007541325629914942, "loss": 0.0766, "theoretical_loss": 3.741347110174362, "tokens_seen": 836239360 }, { "epoch": 0.25, "learning_rate": 0.0007540523190499118, "loss": 0.0775, "theoretical_loss": 3.741232676890074, "tokens_seen": 836501504 }, { "epoch": 0.25, "learning_rate": 0.0007539720751083293, "loss": 0.0782, "theoretical_loss": 3.7411182894989965, "tokens_seen": 836763648 }, { "epoch": 0.25, "learning_rate": 0.000753891831166747, "loss": 0.0778, "theoretical_loss": 3.7410039479683546, "tokens_seen": 837025792 }, { "epoch": 0.25, "learning_rate": 0.0007538115872251645, "loss": 0.0758, "theoretical_loss": 3.740889652265408, "tokens_seen": 837287936 }, { "epoch": 0.25, "learning_rate": 0.0007537313432835821, "loss": 0.0772, "theoretical_loss": 3.7407754023574507, "tokens_seen": 837550080 }, { "epoch": 0.25, "learning_rate": 0.0007536510993419997, "loss": 0.0767, "theoretical_loss": 3.7406611982118076, "tokens_seen": 837812224 }, { "epoch": 0.25, "learning_rate": 0.0007535708554004172, "loss": 0.0768, "theoretical_loss": 3.74054703979584, "tokens_seen": 838074368 }, { "epoch": 0.25, "learning_rate": 0.000753490611458835, "loss": 0.0782, "theoretical_loss": 3.7404329270769403, "tokens_seen": 838336512 }, { "epoch": 0.25, "learning_rate": 0.0007534103675172525, "loss": 0.0758, "theoretical_loss": 3.740318860022537, "tokens_seen": 838598656 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0015410750638693571, "objective/train/docs_used": 309674, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.561414361000061, "objective/train/original_loss": 1.5614142417907715, "objective/train/theoretical_loss": 3.7402048386000892, "objective/train/tokens_used": 859320800, "objective/train/value_avg": -0.01053619384765625, "objective/train/value_loss": 0.0005841842503286898, "objective/train/value_max": -0.0002846717834472656, "objective/train/value_min": -0.48974609375, "objective/train/value_reward_corr": 0.5773049027811739, "objective/train/value_std": 0.016204833984375, "objective/train/weight_avg": 1.0017683506011963, "objective/train/weighted_lm_loss": 1.563459038734436, "objective/train/weights_max": 1.4500267505645752, "objective/train/weights_min": 0.05458702892065048, "theoretical_loss": 3.7402048386000892, "tokens_seen": 838860800 }, { "epoch": 0.25, "learning_rate": 0.0007533301235756701, "loss": 0.0754, "theoretical_loss": 3.7402048386000892, "tokens_seen": 838860800 }, { "epoch": 0.25, "learning_rate": 0.0007532498796340877, "loss": 0.0739, "theoretical_loss": 3.740090862777091, "tokens_seen": 839122944 }, { "epoch": 0.25, "learning_rate": 0.0007531696356925052, "loss": 0.0778, "theoretical_loss": 3.7399769325210697, "tokens_seen": 839385088 }, { "epoch": 0.25, "learning_rate": 0.0007530893917509228, "loss": 0.0771, "theoretical_loss": 3.7398630477995853, "tokens_seen": 839647232 }, { "epoch": 0.25, "learning_rate": 0.0007530091478093404, "loss": 0.0748, "theoretical_loss": 3.7397492085802315, "tokens_seen": 839909376 }, { "epoch": 0.25, "learning_rate": 0.000752928903867758, "loss": 0.0764, "theoretical_loss": 3.739635414830635, "tokens_seen": 840171520 }, { "epoch": 0.25, "learning_rate": 0.0007528486599261755, "loss": 0.076, "theoretical_loss": 3.7395216665184554, "tokens_seen": 840433664 }, { "epoch": 0.25, "learning_rate": 0.0007527684159845932, "loss": 0.0792, "theoretical_loss": 3.739407963611386, "tokens_seen": 840695808 }, { "epoch": 0.25, "learning_rate": 0.0007526881720430108, "loss": 0.074, "theoretical_loss": 3.739294306077152, "tokens_seen": 840957952 }, { "epoch": 0.25, "learning_rate": 0.0007526079281014283, "loss": 0.0785, "theoretical_loss": 3.7391806938835126, "tokens_seen": 841220096 }, { "epoch": 0.26, "learning_rate": 0.000752527684159846, "loss": 0.0755, "theoretical_loss": 3.7390671269982603, "tokens_seen": 841482240 }, { "epoch": 0.26, "learning_rate": 0.0007524474402182635, "loss": 0.0761, "theoretical_loss": 3.7389536053892187, "tokens_seen": 841744384 }, { "epoch": 0.26, "learning_rate": 0.0007523671962766812, "loss": 0.0741, "theoretical_loss": 3.738840129024246, "tokens_seen": 842006528 }, { "epoch": 0.26, "learning_rate": 0.0007522869523350987, "loss": 0.0771, "theoretical_loss": 3.738726697871233, "tokens_seen": 842268672 }, { "epoch": 0.26, "learning_rate": 0.0007522067083935163, "loss": 0.0795, "theoretical_loss": 3.738613311898103, "tokens_seen": 842530816 }, { "epoch": 0.26, "learning_rate": 0.0007521264644519339, "loss": 0.0762, "theoretical_loss": 3.7384999710728106, "tokens_seen": 842792960 }, { "epoch": 0.26, "learning_rate": 0.0007520462205103514, "loss": 0.0768, "theoretical_loss": 3.738386675363346, "tokens_seen": 843055104 }, { "epoch": 0.26, "learning_rate": 0.000751965976568769, "loss": 0.0763, "theoretical_loss": 3.738273424737729, "tokens_seen": 843317248 }, { "epoch": 0.26, "learning_rate": 0.0007518857326271867, "loss": 0.0792, "theoretical_loss": 3.7381602191640146, "tokens_seen": 843579392 }, { "epoch": 0.26, "learning_rate": 0.0007518054886856043, "loss": 0.0769, "theoretical_loss": 3.738047058610289, "tokens_seen": 843841536 }, { "epoch": 0.26, "learning_rate": 0.0007517252447440218, "loss": 0.0767, "theoretical_loss": 3.7379339430446707, "tokens_seen": 844103680 }, { "epoch": 0.26, "learning_rate": 0.0007516450008024395, "loss": 0.075, "theoretical_loss": 3.7378208724353117, "tokens_seen": 844365824 }, { "epoch": 0.26, "learning_rate": 0.000751564756860857, "loss": 0.0776, "theoretical_loss": 3.7377078467503955, "tokens_seen": 844627968 }, { "epoch": 0.26, "learning_rate": 0.0007514845129192746, "loss": 0.0779, "theoretical_loss": 3.737594865958138, "tokens_seen": 844890112 }, { "epoch": 0.26, "learning_rate": 0.0007514042689776922, "loss": 0.0759, "theoretical_loss": 3.7374819300267883, "tokens_seen": 845152256 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0018314362969249487, "objective/train/docs_used": 312167, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4999068975448608, "objective/train/original_loss": 1.4999067783355713, "objective/train/theoretical_loss": 3.7373690389246272, "objective/train/tokens_used": 865874400, "objective/train/value_avg": -0.00806427001953125, "objective/train/value_loss": 0.00013757370470557362, "objective/train/value_max": -0.0001323223114013672, "objective/train/value_min": -0.31982421875, "objective/train/value_reward_corr": 0.6737398459252089, "objective/train/value_std": 0.01104736328125, "objective/train/weight_avg": 1.0018991231918335, "objective/train/weighted_lm_loss": 1.5027458667755127, "objective/train/weights_max": 1.1910135746002197, "objective/train/weights_min": 0.6367841958999634, "theoretical_loss": 3.7373690389246272, "tokens_seen": 845414400 }, { "epoch": 0.26, "learning_rate": 0.0007513240250361097, "loss": 0.0768, "theoretical_loss": 3.7373690389246272, "tokens_seen": 845414400 }, { "epoch": 0.26, "learning_rate": 0.0007512437810945275, "loss": 0.0783, "theoretical_loss": 3.737256192619967, "tokens_seen": 845676544 }, { "epoch": 0.26, "learning_rate": 0.000751163537152945, "loss": 0.0751, "theoretical_loss": 3.737143391081154, "tokens_seen": 845938688 }, { "epoch": 0.26, "learning_rate": 0.0007510832932113626, "loss": 0.076, "theoretical_loss": 3.7370306342765653, "tokens_seen": 846200832 }, { "epoch": 0.26, "learning_rate": 0.0007510030492697802, "loss": 0.0736, "theoretical_loss": 3.73691792217461, "tokens_seen": 846462976 }, { "epoch": 0.26, "learning_rate": 0.0007509228053281978, "loss": 0.0778, "theoretical_loss": 3.7368052547437305, "tokens_seen": 846725120 }, { "epoch": 0.26, "learning_rate": 0.0007508425613866153, "loss": 0.077, "theoretical_loss": 3.7366926319524003, "tokens_seen": 846987264 }, { "epoch": 0.26, "learning_rate": 0.0007507623174450329, "loss": 0.0742, "theoretical_loss": 3.736580053769125, "tokens_seen": 847249408 }, { "epoch": 0.26, "learning_rate": 0.0007506820735034505, "loss": 0.0727, "theoretical_loss": 3.736467520162442, "tokens_seen": 847511552 }, { "epoch": 0.26, "learning_rate": 0.000750601829561868, "loss": 0.0769, "theoretical_loss": 3.736355031100922, "tokens_seen": 847773696 }, { "epoch": 0.26, "learning_rate": 0.0007505215856202858, "loss": 0.0763, "theoretical_loss": 3.7362425865531654, "tokens_seen": 848035840 }, { "epoch": 0.26, "learning_rate": 0.0007504413416787033, "loss": 0.0773, "theoretical_loss": 3.736130186487806, "tokens_seen": 848297984 }, { "epoch": 0.26, "learning_rate": 0.0007503610977371209, "loss": 0.0748, "theoretical_loss": 3.736017830873508, "tokens_seen": 848560128 }, { "epoch": 0.26, "learning_rate": 0.0007502808537955385, "loss": 0.0769, "theoretical_loss": 3.7359055196789694, "tokens_seen": 848822272 }, { "epoch": 0.26, "learning_rate": 0.000750200609853956, "loss": 0.0747, "theoretical_loss": 3.7357932528729183, "tokens_seen": 849084416 }, { "epoch": 0.26, "learning_rate": 0.0007501203659123736, "loss": 0.0785, "theoretical_loss": 3.7356810304241144, "tokens_seen": 849346560 }, { "epoch": 0.26, "learning_rate": 0.0007500401219707912, "loss": 0.0781, "theoretical_loss": 3.73556885230135, "tokens_seen": 849608704 }, { "epoch": 0.26, "learning_rate": 0.0007499598780292088, "loss": 0.0755, "theoretical_loss": 3.735456718473449, "tokens_seen": 849870848 }, { "epoch": 0.26, "learning_rate": 0.0007498796340876264, "loss": 0.0716, "theoretical_loss": 3.7353446289092647, "tokens_seen": 850132992 }, { "epoch": 0.26, "learning_rate": 0.000749799390146044, "loss": 0.0779, "theoretical_loss": 3.7352325835776856, "tokens_seen": 850395136 }, { "epoch": 0.26, "learning_rate": 0.0007497191462044616, "loss": 0.0772, "theoretical_loss": 3.7351205824476277, "tokens_seen": 850657280 }, { "epoch": 0.26, "learning_rate": 0.0007496389022628792, "loss": 0.079, "theoretical_loss": 3.7350086254880415, "tokens_seen": 850919424 }, { "epoch": 0.26, "learning_rate": 0.0007495586583212968, "loss": 0.0742, "theoretical_loss": 3.734896712667907, "tokens_seen": 851181568 }, { "epoch": 0.26, "learning_rate": 0.0007494784143797143, "loss": 0.0753, "theoretical_loss": 3.734784843956236, "tokens_seen": 851443712 }, { "epoch": 0.26, "learning_rate": 0.000749398170438132, "loss": 0.0745, "theoretical_loss": 3.7346730193220727, "tokens_seen": 851705856 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0010687694884836674, "objective/train/docs_used": 314587, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.526440143585205, "objective/train/original_loss": 1.526440143585205, "objective/train/theoretical_loss": 3.7345612387344906, "objective/train/tokens_used": 872428000, "objective/train/value_avg": -0.00872802734375, "objective/train/value_loss": 0.0003840262070298195, "objective/train/value_max": -0.0002779960632324219, "objective/train/value_min": -0.62109375, "objective/train/value_reward_corr": 0.6480027416305001, "objective/train/value_std": 0.016021728515625, "objective/train/weight_avg": 1.0012387037277222, "objective/train/weighted_lm_loss": 1.527116060256958, "objective/train/weights_max": 1.4146990776062012, "objective/train/weights_min": 0.370330810546875, "theoretical_loss": 3.7345612387344906, "tokens_seen": 851968000 }, { "epoch": 0.26, "learning_rate": 0.0007493179264965495, "loss": 0.0755, "theoretical_loss": 3.7345612387344906, "tokens_seen": 851968000 }, { "epoch": 0.26, "learning_rate": 0.0007492376825549671, "loss": 0.078, "theoretical_loss": 3.734449502162596, "tokens_seen": 852230144 }, { "epoch": 0.26, "learning_rate": 0.0007491574386133847, "loss": 0.0772, "theoretical_loss": 3.7343378095755257, "tokens_seen": 852492288 }, { "epoch": 0.26, "learning_rate": 0.0007490771946718022, "loss": 0.0743, "theoretical_loss": 3.7342261609424483, "tokens_seen": 852754432 }, { "epoch": 0.26, "learning_rate": 0.0007489969507302198, "loss": 0.0787, "theoretical_loss": 3.7341145562325613, "tokens_seen": 853016576 }, { "epoch": 0.26, "learning_rate": 0.0007489167067886375, "loss": 0.0761, "theoretical_loss": 3.734002995415096, "tokens_seen": 853278720 }, { "epoch": 0.26, "learning_rate": 0.0007488364628470551, "loss": 0.0771, "theoretical_loss": 3.7338914784593134, "tokens_seen": 853540864 }, { "epoch": 0.26, "learning_rate": 0.0007487562189054726, "loss": 0.0754, "theoretical_loss": 3.733780005334505, "tokens_seen": 853803008 }, { "epoch": 0.26, "learning_rate": 0.0007486759749638903, "loss": 0.0786, "theoretical_loss": 3.733668576009995, "tokens_seen": 854065152 }, { "epoch": 0.26, "learning_rate": 0.0007485957310223078, "loss": 0.0764, "theoretical_loss": 3.733557190455136, "tokens_seen": 854327296 }, { "epoch": 0.26, "learning_rate": 0.0007485154870807254, "loss": 0.074, "theoretical_loss": 3.733445848639313, "tokens_seen": 854589440 }, { "epoch": 0.26, "learning_rate": 0.000748435243139143, "loss": 0.0753, "theoretical_loss": 3.733334550531942, "tokens_seen": 854851584 }, { "epoch": 0.26, "learning_rate": 0.0007483549991975605, "loss": 0.0776, "theoretical_loss": 3.7332232961024694, "tokens_seen": 855113728 }, { "epoch": 0.26, "learning_rate": 0.0007482747552559783, "loss": 0.0758, "theoretical_loss": 3.7331120853203714, "tokens_seen": 855375872 }, { "epoch": 0.26, "learning_rate": 0.0007481945113143958, "loss": 0.0749, "theoretical_loss": 3.733000918155156, "tokens_seen": 855638016 }, { "epoch": 0.26, "learning_rate": 0.0007481142673728134, "loss": 0.0725, "theoretical_loss": 3.7328897945763617, "tokens_seen": 855900160 }, { "epoch": 0.26, "learning_rate": 0.000748034023431231, "loss": 0.078, "theoretical_loss": 3.7327787145535574, "tokens_seen": 856162304 }, { "epoch": 0.26, "learning_rate": 0.0007479537794896486, "loss": 0.0736, "theoretical_loss": 3.732667678056342, "tokens_seen": 856424448 }, { "epoch": 0.26, "learning_rate": 0.0007478735355480661, "loss": 0.0729, "theoretical_loss": 3.732556685054346, "tokens_seen": 856686592 }, { "epoch": 0.26, "learning_rate": 0.0007477932916064837, "loss": 0.0744, "theoretical_loss": 3.7324457355172296, "tokens_seen": 856948736 }, { "epoch": 0.26, "learning_rate": 0.0007477130476649013, "loss": 0.0746, "theoretical_loss": 3.7323348294146843, "tokens_seen": 857210880 }, { "epoch": 0.26, "learning_rate": 0.0007476328037233188, "loss": 0.0757, "theoretical_loss": 3.73222396671643, "tokens_seen": 857473024 }, { "epoch": 0.26, "learning_rate": 0.0007475525597817365, "loss": 0.0766, "theoretical_loss": 3.73211314739222, "tokens_seen": 857735168 }, { "epoch": 0.26, "learning_rate": 0.000747472315840154, "loss": 0.0748, "theoretical_loss": 3.732002371411835, "tokens_seen": 857997312 }, { "epoch": 0.26, "learning_rate": 0.0007473920718985718, "loss": 0.0738, "theoretical_loss": 3.7318916387450876, "tokens_seen": 858259456 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0009312600013799965, "objective/train/docs_used": 317142, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5528024435043335, "objective/train/original_loss": 1.5528024435043335, "objective/train/theoretical_loss": 3.7317809493618204, "objective/train/tokens_used": 878981600, "objective/train/value_avg": -0.00759124755859375, "objective/train/value_loss": 0.0003197310143150389, "objective/train/value_max": -0.00022339820861816406, "objective/train/value_min": -0.65625, "objective/train/value_reward_corr": 0.5907966960002585, "objective/train/value_std": 0.01306915283203125, "objective/train/weight_avg": 1.0010712146759033, "objective/train/weighted_lm_loss": 1.5542237758636475, "objective/train/weights_max": 1.8819797039031982, "objective/train/weights_min": 0.3681640923023224, "theoretical_loss": 3.7317809493618204, "tokens_seen": 858521600 }, { "epoch": 0.26, "learning_rate": 0.0007473118279569893, "loss": 0.0775, "theoretical_loss": 3.7317809493618204, "tokens_seen": 858521600 }, { "epoch": 0.26, "learning_rate": 0.0007472315840154068, "loss": 0.0751, "theoretical_loss": 3.7316703032319056, "tokens_seen": 858783744 }, { "epoch": 0.26, "learning_rate": 0.0007471513400738245, "loss": 0.0786, "theoretical_loss": 3.7315597003252474, "tokens_seen": 859045888 }, { "epoch": 0.26, "learning_rate": 0.000747071096132242, "loss": 0.0784, "theoretical_loss": 3.731449140611777, "tokens_seen": 859308032 }, { "epoch": 0.26, "learning_rate": 0.0007469908521906596, "loss": 0.0748, "theoretical_loss": 3.7313386240614577, "tokens_seen": 859570176 }, { "epoch": 0.26, "learning_rate": 0.0007469106082490772, "loss": 0.0761, "theoretical_loss": 3.7312281506442835, "tokens_seen": 859832320 }, { "epoch": 0.26, "learning_rate": 0.0007468303643074948, "loss": 0.0757, "theoretical_loss": 3.7311177203302766, "tokens_seen": 860094464 }, { "epoch": 0.26, "learning_rate": 0.0007467501203659124, "loss": 0.0766, "theoretical_loss": 3.7310073330894906, "tokens_seen": 860356608 }, { "epoch": 0.26, "learning_rate": 0.00074666987642433, "loss": 0.0739, "theoretical_loss": 3.730896988892008, "tokens_seen": 860618752 }, { "epoch": 0.26, "learning_rate": 0.0007465896324827476, "loss": 0.0808, "theoretical_loss": 3.7307866877079414, "tokens_seen": 860880896 }, { "epoch": 0.26, "learning_rate": 0.0007465093885411651, "loss": 0.0756, "theoretical_loss": 3.730676429507435, "tokens_seen": 861143040 }, { "epoch": 0.26, "learning_rate": 0.0007464291445995828, "loss": 0.0736, "theoretical_loss": 3.730566214260659, "tokens_seen": 861405184 }, { "epoch": 0.26, "learning_rate": 0.0007463489006580003, "loss": 0.0759, "theoretical_loss": 3.730456041937817, "tokens_seen": 861667328 }, { "epoch": 0.26, "learning_rate": 0.0007462686567164179, "loss": 0.0766, "theoretical_loss": 3.730345912509141, "tokens_seen": 861929472 }, { "epoch": 0.26, "learning_rate": 0.0007461884127748355, "loss": 0.074, "theoretical_loss": 3.7302358259448924, "tokens_seen": 862191616 }, { "epoch": 0.26, "learning_rate": 0.000746108168833253, "loss": 0.0776, "theoretical_loss": 3.730125782215362, "tokens_seen": 862453760 }, { "epoch": 0.26, "learning_rate": 0.0007460279248916708, "loss": 0.0766, "theoretical_loss": 3.730015781290872, "tokens_seen": 862715904 }, { "epoch": 0.26, "learning_rate": 0.0007459476809500883, "loss": 0.0783, "theoretical_loss": 3.729905823141771, "tokens_seen": 862978048 }, { "epoch": 0.26, "learning_rate": 0.0007458674370085059, "loss": 0.0739, "theoretical_loss": 3.729795907738441, "tokens_seen": 863240192 }, { "epoch": 0.26, "learning_rate": 0.0007457871930669235, "loss": 0.0746, "theoretical_loss": 3.729686035051291, "tokens_seen": 863502336 }, { "epoch": 0.26, "learning_rate": 0.0007457069491253411, "loss": 0.0761, "theoretical_loss": 3.7295762050507593, "tokens_seen": 863764480 }, { "epoch": 0.26, "learning_rate": 0.0007456267051837586, "loss": 0.0754, "theoretical_loss": 3.7294664177073145, "tokens_seen": 864026624 }, { "epoch": 0.26, "learning_rate": 0.0007455464612421762, "loss": 0.0773, "theoretical_loss": 3.7293566729914547, "tokens_seen": 864288768 }, { "epoch": 0.26, "learning_rate": 0.0007454662173005938, "loss": 0.0756, "theoretical_loss": 3.7292469708737066, "tokens_seen": 864550912 }, { "epoch": 0.26, "learning_rate": 0.0007453859733590113, "loss": 0.0764, "theoretical_loss": 3.729137311324627, "tokens_seen": 864813056 }, { "epoch": 0.26, "objective/train/advantage_avg": -0.00015239574713632464, "objective/train/docs_used": 319421, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4767508506774902, "objective/train/original_loss": 1.4767508506774902, "objective/train/theoretical_loss": 3.7290276943148015, "objective/train/tokens_used": 885535200, "objective/train/value_avg": -0.00782012939453125, "objective/train/value_loss": 0.0005899901734665036, "objective/train/value_max": -0.00014889240264892578, "objective/train/value_min": -0.95166015625, "objective/train/value_reward_corr": 0.5327929289583835, "objective/train/value_std": 0.0201263427734375, "objective/train/weight_avg": 1.0001168251037598, "objective/train/weighted_lm_loss": 1.4773733615875244, "objective/train/weights_max": 2.5354816913604736, "objective/train/weights_min": 0.23555737733840942, "theoretical_loss": 3.7290276943148015, "tokens_seen": 865075200 }, { "epoch": 0.26, "learning_rate": 0.000745305729417429, "loss": 0.073, "theoretical_loss": 3.7290276943148015, "tokens_seen": 865075200 }, { "epoch": 0.26, "learning_rate": 0.0007452254854758466, "loss": 0.0777, "theoretical_loss": 3.7289181198148458, "tokens_seen": 865337344 }, { "epoch": 0.26, "learning_rate": 0.0007451452415342642, "loss": 0.0771, "theoretical_loss": 3.7288085877954025, "tokens_seen": 865599488 }, { "epoch": 0.26, "learning_rate": 0.0007450649975926818, "loss": 0.0765, "theoretical_loss": 3.728699098227146, "tokens_seen": 865861632 }, { "epoch": 0.26, "learning_rate": 0.0007449847536510994, "loss": 0.0757, "theoretical_loss": 3.728589651080779, "tokens_seen": 866123776 }, { "epoch": 0.26, "learning_rate": 0.000744904509709517, "loss": 0.0753, "theoretical_loss": 3.728480246327032, "tokens_seen": 866385920 }, { "epoch": 0.26, "learning_rate": 0.0007448242657679345, "loss": 0.0744, "theoretical_loss": 3.7283708839366656, "tokens_seen": 866648064 }, { "epoch": 0.26, "learning_rate": 0.0007447440218263521, "loss": 0.0766, "theoretical_loss": 3.72826156388047, "tokens_seen": 866910208 }, { "epoch": 0.26, "learning_rate": 0.0007446637778847697, "loss": 0.0762, "theoretical_loss": 3.728152286129263, "tokens_seen": 867172352 }, { "epoch": 0.26, "learning_rate": 0.0007445835339431873, "loss": 0.0744, "theoretical_loss": 3.728043050653893, "tokens_seen": 867434496 }, { "epoch": 0.26, "learning_rate": 0.0007445032900016049, "loss": 0.0797, "theoretical_loss": 3.7279338574252354, "tokens_seen": 867696640 }, { "epoch": 0.26, "learning_rate": 0.0007444230460600226, "loss": 0.0779, "theoretical_loss": 3.7278247064141956, "tokens_seen": 867958784 }, { "epoch": 0.26, "learning_rate": 0.0007443428021184401, "loss": 0.0771, "theoretical_loss": 3.7277155975917076, "tokens_seen": 868220928 }, { "epoch": 0.26, "learning_rate": 0.0007442625581768576, "loss": 0.0779, "theoretical_loss": 3.7276065309287345, "tokens_seen": 868483072 }, { "epoch": 0.26, "learning_rate": 0.0007441823142352753, "loss": 0.0774, "theoretical_loss": 3.727497506396267, "tokens_seen": 868745216 }, { "epoch": 0.26, "learning_rate": 0.0007441020702936928, "loss": 0.075, "theoretical_loss": 3.7273885239653266, "tokens_seen": 869007360 }, { "epoch": 0.26, "learning_rate": 0.0007440218263521104, "loss": 0.0752, "theoretical_loss": 3.727279583606961, "tokens_seen": 869269504 }, { "epoch": 0.26, "learning_rate": 0.000743941582410528, "loss": 0.0763, "theoretical_loss": 3.727170685292248, "tokens_seen": 869531648 }, { "epoch": 0.26, "learning_rate": 0.0007438613384689456, "loss": 0.0769, "theoretical_loss": 3.7270618289922943, "tokens_seen": 869793792 }, { "epoch": 0.26, "learning_rate": 0.0007437810945273631, "loss": 0.0752, "theoretical_loss": 3.7269530146782337, "tokens_seen": 870055936 }, { "epoch": 0.26, "learning_rate": 0.0007437008505857808, "loss": 0.0808, "theoretical_loss": 3.72684424232123, "tokens_seen": 870318080 }, { "epoch": 0.26, "learning_rate": 0.0007436206066441984, "loss": 0.077, "theoretical_loss": 3.7267355118924748, "tokens_seen": 870580224 }, { "epoch": 0.26, "learning_rate": 0.000743540362702616, "loss": 0.0752, "theoretical_loss": 3.726626823363188, "tokens_seen": 870842368 }, { "epoch": 0.26, "learning_rate": 0.0007434601187610336, "loss": 0.0754, "theoretical_loss": 3.7265181767046176, "tokens_seen": 871104512 }, { "epoch": 0.26, "learning_rate": 0.0007433798748194511, "loss": 0.0772, "theoretical_loss": 3.726409571888042, "tokens_seen": 871366656 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0006801082054153085, "objective/train/docs_used": 321822, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6108815670013428, "objective/train/original_loss": 1.6108815670013428, "objective/train/theoretical_loss": 3.7263010088847652, "objective/train/tokens_used": 892088800, "objective/train/value_avg": -0.00757598876953125, "objective/train/value_loss": 0.00016946658433880657, "objective/train/value_max": -0.00016224384307861328, "objective/train/value_min": -0.28515625, "objective/train/value_reward_corr": 0.7011885448576807, "objective/train/value_std": 0.01305389404296875, "objective/train/weight_avg": 1.0007609128952026, "objective/train/weighted_lm_loss": 1.6116539239883423, "objective/train/weights_max": 1.1948398351669312, "objective/train/weights_min": 0.39612501859664917, "theoretical_loss": 3.7263010088847652, "tokens_seen": 871628800 }, { "epoch": 0.26, "learning_rate": 0.0007432996308778688, "loss": 0.0783, "theoretical_loss": 3.7263010088847652, "tokens_seen": 871628800 }, { "epoch": 0.26, "learning_rate": 0.0007432193869362863, "loss": 0.0751, "theoretical_loss": 3.726192487666121, "tokens_seen": 871890944 }, { "epoch": 0.26, "learning_rate": 0.0007431391429947038, "loss": 0.0762, "theoretical_loss": 3.7260840082034714, "tokens_seen": 872153088 }, { "epoch": 0.26, "learning_rate": 0.0007430588990531216, "loss": 0.0758, "theoretical_loss": 3.7259755704682065, "tokens_seen": 872415232 }, { "epoch": 0.26, "learning_rate": 0.0007429786551115391, "loss": 0.0765, "theoretical_loss": 3.7258671744317446, "tokens_seen": 872677376 }, { "epoch": 0.26, "learning_rate": 0.0007428984111699567, "loss": 0.0767, "theoretical_loss": 3.725758820065531, "tokens_seen": 872939520 }, { "epoch": 0.26, "learning_rate": 0.0007428181672283743, "loss": 0.0759, "theoretical_loss": 3.725650507341042, "tokens_seen": 873201664 }, { "epoch": 0.26, "learning_rate": 0.0007427379232867919, "loss": 0.0773, "theoretical_loss": 3.7255422362297788, "tokens_seen": 873463808 }, { "epoch": 0.26, "learning_rate": 0.0007426576793452094, "loss": 0.0758, "theoretical_loss": 3.7254340067032725, "tokens_seen": 873725952 }, { "epoch": 0.26, "learning_rate": 0.000742577435403627, "loss": 0.0765, "theoretical_loss": 3.7253258187330816, "tokens_seen": 873988096 }, { "epoch": 0.26, "learning_rate": 0.0007424971914620446, "loss": 0.0761, "theoretical_loss": 3.7252176722907926, "tokens_seen": 874250240 }, { "epoch": 0.27, "learning_rate": 0.0007424169475204621, "loss": 0.0786, "theoretical_loss": 3.725109567348021, "tokens_seen": 874512384 }, { "epoch": 0.27, "learning_rate": 0.0007423367035788799, "loss": 0.0777, "theoretical_loss": 3.725001503876408, "tokens_seen": 874774528 }, { "epoch": 0.27, "learning_rate": 0.0007422564596372974, "loss": 0.0753, "theoretical_loss": 3.7248934818476247, "tokens_seen": 875036672 }, { "epoch": 0.27, "learning_rate": 0.0007421762156957151, "loss": 0.0756, "theoretical_loss": 3.7247855012333693, "tokens_seen": 875298816 }, { "epoch": 0.27, "learning_rate": 0.0007420959717541326, "loss": 0.0758, "theoretical_loss": 3.7246775620053665, "tokens_seen": 875560960 }, { "epoch": 0.27, "learning_rate": 0.0007420157278125502, "loss": 0.0766, "theoretical_loss": 3.724569664135372, "tokens_seen": 875823104 }, { "epoch": 0.27, "learning_rate": 0.0007419354838709678, "loss": 0.0761, "theoretical_loss": 3.7244618075951657, "tokens_seen": 876085248 }, { "epoch": 0.27, "learning_rate": 0.0007418552399293853, "loss": 0.0779, "theoretical_loss": 3.7243539923565576, "tokens_seen": 876347392 }, { "epoch": 0.27, "learning_rate": 0.0007417749959878029, "loss": 0.0773, "theoretical_loss": 3.724246218391384, "tokens_seen": 876609536 }, { "epoch": 0.27, "learning_rate": 0.0007416947520462205, "loss": 0.0779, "theoretical_loss": 3.7241384856715096, "tokens_seen": 876871680 }, { "epoch": 0.27, "learning_rate": 0.0007416145081046381, "loss": 0.0754, "theoretical_loss": 3.724030794168826, "tokens_seen": 877133824 }, { "epoch": 0.27, "learning_rate": 0.0007415342641630557, "loss": 0.0752, "theoretical_loss": 3.723923143855253, "tokens_seen": 877395968 }, { "epoch": 0.27, "learning_rate": 0.0007414540202214734, "loss": 0.0744, "theoretical_loss": 3.723815534702738, "tokens_seen": 877658112 }, { "epoch": 0.27, "learning_rate": 0.0007413737762798909, "loss": 0.0784, "theoretical_loss": 3.7237079666832553, "tokens_seen": 877920256 }, { "epoch": 0.27, "objective/train/advantage_avg": 9.239621431333944e-05, "objective/train/docs_used": 324331, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5191644430160522, "objective/train/original_loss": 1.5191644430160522, "objective/train/theoretical_loss": 3.7236004397688065, "objective/train/tokens_used": 898642400, "objective/train/value_avg": -0.01021575927734375, "objective/train/value_loss": 0.0004930080031044781, "objective/train/value_max": -0.0001596212387084961, "objective/train/value_min": -0.9365234375, "objective/train/value_reward_corr": 0.6092888427252324, "objective/train/value_std": 0.017242431640625, "objective/train/weight_avg": 1.0003056526184082, "objective/train/weighted_lm_loss": 1.51966392993927, "objective/train/weights_max": 1.5377529859542847, "objective/train/weights_min": 0.36856982111930847, "theoretical_loss": 3.7236004397688065, "tokens_seen": 878182400 }, { "epoch": 0.27, "learning_rate": 0.0007412935323383084, "loss": 0.0779, "theoretical_loss": 3.7236004397688065, "tokens_seen": 878182400 }, { "epoch": 0.27, "learning_rate": 0.0007412132883967261, "loss": 0.0754, "theoretical_loss": 3.723492953931421, "tokens_seen": 878444544 }, { "epoch": 0.27, "learning_rate": 0.0007411330444551436, "loss": 0.0758, "theoretical_loss": 3.7233855091431565, "tokens_seen": 878706688 }, { "epoch": 0.27, "learning_rate": 0.0007410528005135613, "loss": 0.0759, "theoretical_loss": 3.723278105376096, "tokens_seen": 878968832 }, { "epoch": 0.27, "learning_rate": 0.0007409725565719788, "loss": 0.0747, "theoretical_loss": 3.723170742602351, "tokens_seen": 879230976 }, { "epoch": 0.27, "learning_rate": 0.0007408923126303964, "loss": 0.0772, "theoretical_loss": 3.7230634207940607, "tokens_seen": 879493120 }, { "epoch": 0.27, "learning_rate": 0.0007408120686888141, "loss": 0.0771, "theoretical_loss": 3.7229561399233906, "tokens_seen": 879755264 }, { "epoch": 0.27, "learning_rate": 0.0007407318247472316, "loss": 0.0789, "theoretical_loss": 3.7228488999625338, "tokens_seen": 880017408 }, { "epoch": 0.27, "learning_rate": 0.0007406515808056492, "loss": 0.0773, "theoretical_loss": 3.722741700883711, "tokens_seen": 880279552 }, { "epoch": 0.27, "learning_rate": 0.0007405713368640668, "loss": 0.0755, "theoretical_loss": 3.7226345426591694, "tokens_seen": 880541696 }, { "epoch": 0.27, "learning_rate": 0.0007404910929224844, "loss": 0.073, "theoretical_loss": 3.722527425261183, "tokens_seen": 880803840 }, { "epoch": 0.27, "learning_rate": 0.0007404108489809019, "loss": 0.0782, "theoretical_loss": 3.7224203486620535, "tokens_seen": 881065984 }, { "epoch": 0.27, "learning_rate": 0.0007403306050393196, "loss": 0.0775, "theoretical_loss": 3.7223133128341104, "tokens_seen": 881328128 }, { "epoch": 0.27, "learning_rate": 0.0007402503610977371, "loss": 0.0751, "theoretical_loss": 3.722206317749708, "tokens_seen": 881590272 }, { "epoch": 0.27, "learning_rate": 0.0007401701171561546, "loss": 0.0777, "theoretical_loss": 3.722099363381229, "tokens_seen": 881852416 }, { "epoch": 0.27, "learning_rate": 0.0007400898732145724, "loss": 0.0767, "theoretical_loss": 3.7219924497010837, "tokens_seen": 882114560 }, { "epoch": 0.27, "learning_rate": 0.0007400096292729899, "loss": 0.0767, "theoretical_loss": 3.721885576681708, "tokens_seen": 882376704 }, { "epoch": 0.27, "learning_rate": 0.0007399293853314075, "loss": 0.0778, "theoretical_loss": 3.7217787442955643, "tokens_seen": 882638848 }, { "epoch": 0.27, "learning_rate": 0.0007398491413898251, "loss": 0.0765, "theoretical_loss": 3.721671952515144, "tokens_seen": 882900992 }, { "epoch": 0.27, "learning_rate": 0.0007397688974482427, "loss": 0.0761, "theoretical_loss": 3.7215652013129628, "tokens_seen": 883163136 }, { "epoch": 0.27, "learning_rate": 0.0007396886535066603, "loss": 0.0756, "theoretical_loss": 3.7214584906615644, "tokens_seen": 883425280 }, { "epoch": 0.27, "learning_rate": 0.0007396084095650778, "loss": 0.0732, "theoretical_loss": 3.7213518205335196, "tokens_seen": 883687424 }, { "epoch": 0.27, "learning_rate": 0.0007395281656234954, "loss": 0.0764, "theoretical_loss": 3.721245190901425, "tokens_seen": 883949568 }, { "epoch": 0.27, "learning_rate": 0.000739447921681913, "loss": 0.0768, "theoretical_loss": 3.721138601737904, "tokens_seen": 884211712 }, { "epoch": 0.27, "learning_rate": 0.0007393676777403307, "loss": 0.0753, "theoretical_loss": 3.721032053015607, "tokens_seen": 884473856 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0008710287511348724, "objective/train/docs_used": 326745, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.574365496635437, "objective/train/original_loss": 1.5743653774261475, "objective/train/theoretical_loss": 3.720925544707211, "objective/train/tokens_used": 905196000, "objective/train/value_avg": -0.007781982421875, "objective/train/value_loss": 0.00020630114886444062, "objective/train/value_max": -0.00011235475540161133, "objective/train/value_min": -0.50830078125, "objective/train/value_reward_corr": 0.6715271500942702, "objective/train/value_std": 0.013824462890625, "objective/train/weight_avg": 1.0009685754776, "objective/train/weighted_lm_loss": 1.576448917388916, "objective/train/weights_max": 1.275231957435608, "objective/train/weights_min": 0.3771900534629822, "theoretical_loss": 3.720925544707211, "tokens_seen": 884736000 }, { "epoch": 0.27, "learning_rate": 0.0007392874337987482, "loss": 0.0758, "theoretical_loss": 3.720925544707211, "tokens_seen": 884736000 }, { "epoch": 0.27, "learning_rate": 0.0007392071898571659, "loss": 0.0778, "theoretical_loss": 3.720819076785419, "tokens_seen": 884998144 }, { "epoch": 0.27, "learning_rate": 0.0007391269459155834, "loss": 0.074, "theoretical_loss": 3.720712649222961, "tokens_seen": 885260288 }, { "epoch": 0.27, "learning_rate": 0.0007390467019740009, "loss": 0.075, "theoretical_loss": 3.720606261992593, "tokens_seen": 885522432 }, { "epoch": 0.27, "learning_rate": 0.0007389664580324186, "loss": 0.0775, "theoretical_loss": 3.7204999150670988, "tokens_seen": 885784576 }, { "epoch": 0.27, "learning_rate": 0.0007388862140908361, "loss": 0.0753, "theoretical_loss": 3.7203936084192866, "tokens_seen": 886046720 }, { "epoch": 0.27, "learning_rate": 0.0007388059701492537, "loss": 0.0772, "theoretical_loss": 3.720287342021992, "tokens_seen": 886308864 }, { "epoch": 0.27, "learning_rate": 0.0007387257262076713, "loss": 0.0745, "theoretical_loss": 3.720181115848078, "tokens_seen": 886571008 }, { "epoch": 0.27, "learning_rate": 0.000738645482266089, "loss": 0.074, "theoretical_loss": 3.7200749298704316, "tokens_seen": 886833152 }, { "epoch": 0.27, "learning_rate": 0.0007385652383245065, "loss": 0.0778, "theoretical_loss": 3.7199687840619675, "tokens_seen": 887095296 }, { "epoch": 0.27, "learning_rate": 0.0007384849943829242, "loss": 0.0766, "theoretical_loss": 3.719862678395627, "tokens_seen": 887357440 }, { "epoch": 0.27, "learning_rate": 0.0007384047504413417, "loss": 0.0755, "theoretical_loss": 3.719756612844377, "tokens_seen": 887619584 }, { "epoch": 0.27, "learning_rate": 0.0007383245064997593, "loss": 0.0737, "theoretical_loss": 3.7196505873812105, "tokens_seen": 887881728 }, { "epoch": 0.27, "learning_rate": 0.0007382442625581769, "loss": 0.0764, "theoretical_loss": 3.7195446019791465, "tokens_seen": 888143872 }, { "epoch": 0.27, "learning_rate": 0.0007381640186165944, "loss": 0.0755, "theoretical_loss": 3.7194386566112314, "tokens_seen": 888406016 }, { "epoch": 0.27, "learning_rate": 0.0007380837746750121, "loss": 0.0737, "theoretical_loss": 3.7193327512505356, "tokens_seen": 888668160 }, { "epoch": 0.27, "learning_rate": 0.0007380035307334296, "loss": 0.0773, "theoretical_loss": 3.7192268858701576, "tokens_seen": 888930304 }, { "epoch": 0.27, "learning_rate": 0.0007379232867918472, "loss": 0.0742, "theoretical_loss": 3.7191210604432205, "tokens_seen": 889192448 }, { "epoch": 0.27, "learning_rate": 0.0007378430428502649, "loss": 0.0774, "theoretical_loss": 3.7190152749428735, "tokens_seen": 889454592 }, { "epoch": 0.27, "learning_rate": 0.0007377627989086824, "loss": 0.0758, "theoretical_loss": 3.7189095293422927, "tokens_seen": 889716736 }, { "epoch": 0.27, "learning_rate": 0.0007376825549671, "loss": 0.0772, "theoretical_loss": 3.71880382361468, "tokens_seen": 889978880 }, { "epoch": 0.27, "learning_rate": 0.0007376023110255176, "loss": 0.0766, "theoretical_loss": 3.7186981577332614, "tokens_seen": 890241024 }, { "epoch": 0.27, "learning_rate": 0.0007375220670839352, "loss": 0.0735, "theoretical_loss": 3.718592531671291, "tokens_seen": 890503168 }, { "epoch": 0.27, "learning_rate": 0.0007374418231423527, "loss": 0.0759, "theoretical_loss": 3.7184869454020477, "tokens_seen": 890765312 }, { "epoch": 0.27, "learning_rate": 0.0007373615792007704, "loss": 0.0747, "theoretical_loss": 3.7183813988988357, "tokens_seen": 891027456 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0005592244560830295, "objective/train/docs_used": 329220, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.468513011932373, "objective/train/original_loss": 1.468513011932373, "objective/train/theoretical_loss": 3.7182758921349865, "objective/train/tokens_used": 911749600, "objective/train/value_avg": -0.00971221923828125, "objective/train/value_loss": 0.0005271669942885637, "objective/train/value_max": -0.00017535686492919922, "objective/train/value_min": -0.74658203125, "objective/train/value_reward_corr": 0.6262416599375313, "objective/train/value_std": 0.0177154541015625, "objective/train/weight_avg": 1.0007938146591187, "objective/train/weighted_lm_loss": 1.4691156148910522, "objective/train/weights_max": 1.5486183166503906, "objective/train/weights_min": 0.36942073702812195, "theoretical_loss": 3.7182758921349865, "tokens_seen": 891289600 }, { "epoch": 0.27, "learning_rate": 0.0007372813352591879, "loss": 0.078, "theoretical_loss": 3.7182758921349865, "tokens_seen": 891289600 }, { "epoch": 0.27, "learning_rate": 0.0007372010913176055, "loss": 0.076, "theoretical_loss": 3.718170425083856, "tokens_seen": 891551744 }, { "epoch": 0.27, "learning_rate": 0.0007371208473760232, "loss": 0.0755, "theoretical_loss": 3.718064997718826, "tokens_seen": 891813888 }, { "epoch": 0.27, "learning_rate": 0.0007370406034344407, "loss": 0.0754, "theoretical_loss": 3.7179596100133034, "tokens_seen": 892076032 }, { "epoch": 0.27, "learning_rate": 0.0007369603594928584, "loss": 0.078, "theoretical_loss": 3.7178542619407233, "tokens_seen": 892338176 }, { "epoch": 0.27, "learning_rate": 0.0007368801155512759, "loss": 0.0785, "theoretical_loss": 3.7177489534745427, "tokens_seen": 892600320 }, { "epoch": 0.27, "learning_rate": 0.0007367998716096935, "loss": 0.0786, "theoretical_loss": 3.717643684588247, "tokens_seen": 892862464 }, { "epoch": 0.27, "learning_rate": 0.0007367196276681111, "loss": 0.0766, "theoretical_loss": 3.7175384552553457, "tokens_seen": 893124608 }, { "epoch": 0.27, "learning_rate": 0.0007366393837265286, "loss": 0.0739, "theoretical_loss": 3.7174332654493742, "tokens_seen": 893386752 }, { "epoch": 0.27, "learning_rate": 0.0007365591397849462, "loss": 0.0766, "theoretical_loss": 3.717328115143894, "tokens_seen": 893648896 }, { "epoch": 0.27, "learning_rate": 0.0007364788958433638, "loss": 0.0766, "theoretical_loss": 3.717223004312491, "tokens_seen": 893911040 }, { "epoch": 0.27, "learning_rate": 0.0007363986519017815, "loss": 0.0767, "theoretical_loss": 3.717117932928777, "tokens_seen": 894173184 }, { "epoch": 0.27, "learning_rate": 0.000736318407960199, "loss": 0.0752, "theoretical_loss": 3.7170129009663886, "tokens_seen": 894435328 }, { "epoch": 0.27, "learning_rate": 0.0007362381640186167, "loss": 0.0777, "theoretical_loss": 3.716907908398989, "tokens_seen": 894697472 }, { "epoch": 0.27, "learning_rate": 0.0007361579200770342, "loss": 0.0756, "theoretical_loss": 3.7168029552002655, "tokens_seen": 894959616 }, { "epoch": 0.27, "learning_rate": 0.0007360776761354517, "loss": 0.0764, "theoretical_loss": 3.716698041343931, "tokens_seen": 895221760 }, { "epoch": 0.27, "learning_rate": 0.0007359974321938694, "loss": 0.0779, "theoretical_loss": 3.716593166803724, "tokens_seen": 895483904 }, { "epoch": 0.27, "learning_rate": 0.0007359171882522869, "loss": 0.0734, "theoretical_loss": 3.7164883315534087, "tokens_seen": 895746048 }, { "epoch": 0.27, "learning_rate": 0.0007358369443107046, "loss": 0.0773, "theoretical_loss": 3.7163835355667723, "tokens_seen": 896008192 }, { "epoch": 0.27, "learning_rate": 0.0007357567003691221, "loss": 0.074, "theoretical_loss": 3.7162787788176295, "tokens_seen": 896270336 }, { "epoch": 0.27, "learning_rate": 0.0007356764564275397, "loss": 0.0762, "theoretical_loss": 3.716174061279819, "tokens_seen": 896532480 }, { "epoch": 0.27, "learning_rate": 0.0007355962124859574, "loss": 0.0739, "theoretical_loss": 3.7160693829272047, "tokens_seen": 896794624 }, { "epoch": 0.27, "learning_rate": 0.000735515968544375, "loss": 0.0746, "theoretical_loss": 3.715964743733676, "tokens_seen": 897056768 }, { "epoch": 0.27, "learning_rate": 0.0007354357246027925, "loss": 0.0778, "theoretical_loss": 3.7158601436731464, "tokens_seen": 897318912 }, { "epoch": 0.27, "learning_rate": 0.0007353554806612101, "loss": 0.0749, "theoretical_loss": 3.715755582719556, "tokens_seen": 897581056 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0012793752830475569, "objective/train/docs_used": 331577, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5417509078979492, "objective/train/original_loss": 1.5417510271072388, "objective/train/theoretical_loss": 3.7156510608468674, "objective/train/tokens_used": 918303200, "objective/train/value_avg": -0.007205963134765625, "objective/train/value_loss": 0.0003468697250355035, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.890625, "objective/train/value_reward_corr": 0.5998975412340294, "objective/train/value_std": 0.014312744140625, "objective/train/weight_avg": 1.0014266967773438, "objective/train/weighted_lm_loss": 1.5438824892044067, "objective/train/weights_max": 1.7019463777542114, "objective/train/weights_min": 0.2276768684387207, "theoretical_loss": 3.7156510608468674, "tokens_seen": 897843200 }, { "epoch": 0.27, "learning_rate": 0.0007352752367196277, "loss": 0.072, "theoretical_loss": 3.7156510608468674, "tokens_seen": 897843200 }, { "epoch": 0.27, "learning_rate": 0.0007351949927780452, "loss": 0.0769, "theoretical_loss": 3.7155465780290706, "tokens_seen": 898105344 }, { "epoch": 0.27, "learning_rate": 0.0007351147488364629, "loss": 0.0755, "theoretical_loss": 3.7154421342401793, "tokens_seen": 898367488 }, { "epoch": 0.27, "learning_rate": 0.0007350345048948804, "loss": 0.0757, "theoretical_loss": 3.7153377294542325, "tokens_seen": 898629632 }, { "epoch": 0.27, "learning_rate": 0.000734954260953298, "loss": 0.0756, "theoretical_loss": 3.715233363645293, "tokens_seen": 898891776 }, { "epoch": 0.27, "learning_rate": 0.0007348740170117157, "loss": 0.0757, "theoretical_loss": 3.7151290367874497, "tokens_seen": 899153920 }, { "epoch": 0.27, "learning_rate": 0.0007347937730701332, "loss": 0.0744, "theoretical_loss": 3.715024748854815, "tokens_seen": 899416064 }, { "epoch": 0.27, "learning_rate": 0.0007347135291285509, "loss": 0.0753, "theoretical_loss": 3.714920499821528, "tokens_seen": 899678208 }, { "epoch": 0.27, "learning_rate": 0.0007346332851869684, "loss": 0.0746, "theoretical_loss": 3.7148162896617505, "tokens_seen": 899940352 }, { "epoch": 0.27, "learning_rate": 0.000734553041245386, "loss": 0.074, "theoretical_loss": 3.714712118349669, "tokens_seen": 900202496 }, { "epoch": 0.27, "learning_rate": 0.0007344727973038036, "loss": 0.0788, "theoretical_loss": 3.7146079858594976, "tokens_seen": 900464640 }, { "epoch": 0.27, "learning_rate": 0.0007343925533622212, "loss": 0.0753, "theoretical_loss": 3.714503892165471, "tokens_seen": 900726784 }, { "epoch": 0.27, "learning_rate": 0.0007343123094206387, "loss": 0.0758, "theoretical_loss": 3.714399837241851, "tokens_seen": 900988928 }, { "epoch": 0.27, "learning_rate": 0.0007342320654790563, "loss": 0.0748, "theoretical_loss": 3.7142958210629233, "tokens_seen": 901251072 }, { "epoch": 0.27, "learning_rate": 0.000734151821537474, "loss": 0.0769, "theoretical_loss": 3.714191843602998, "tokens_seen": 901513216 }, { "epoch": 0.27, "learning_rate": 0.0007340715775958915, "loss": 0.0769, "theoretical_loss": 3.7140879048364104, "tokens_seen": 901775360 }, { "epoch": 0.27, "learning_rate": 0.0007339913336543092, "loss": 0.0756, "theoretical_loss": 3.7139840047375183, "tokens_seen": 902037504 }, { "epoch": 0.27, "learning_rate": 0.0007339110897127267, "loss": 0.0759, "theoretical_loss": 3.713880143280707, "tokens_seen": 902299648 }, { "epoch": 0.27, "learning_rate": 0.0007338308457711443, "loss": 0.0758, "theoretical_loss": 3.713776320440383, "tokens_seen": 902561792 }, { "epoch": 0.27, "learning_rate": 0.0007337506018295619, "loss": 0.0756, "theoretical_loss": 3.7136725361909795, "tokens_seen": 902823936 }, { "epoch": 0.27, "learning_rate": 0.0007336703578879794, "loss": 0.0763, "theoretical_loss": 3.713568790506953, "tokens_seen": 903086080 }, { "epoch": 0.27, "learning_rate": 0.000733590113946397, "loss": 0.0745, "theoretical_loss": 3.7134650833627854, "tokens_seen": 903348224 }, { "epoch": 0.27, "learning_rate": 0.0007335098700048146, "loss": 0.0735, "theoretical_loss": 3.7133614147329808, "tokens_seen": 903610368 }, { "epoch": 0.27, "learning_rate": 0.0007334296260632322, "loss": 0.0775, "theoretical_loss": 3.7132577845920696, "tokens_seen": 903872512 }, { "epoch": 0.27, "learning_rate": 0.0007333493821216499, "loss": 0.076, "theoretical_loss": 3.7131541929146055, "tokens_seen": 904134656 }, { "epoch": 0.27, "objective/train/advantage_avg": -9.983734344132245e-05, "objective/train/docs_used": 333943, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4496132135391235, "objective/train/original_loss": 1.449613094329834, "objective/train/theoretical_loss": 3.713050639675166, "objective/train/tokens_used": 924856800, "objective/train/value_avg": -0.00740814208984375, "objective/train/value_loss": 0.00036323920357972383, "objective/train/value_max": -0.00014650821685791016, "objective/train/value_min": -0.87744140625, "objective/train/value_reward_corr": 0.7248527212737947, "objective/train/value_std": 0.014312744140625, "objective/train/weight_avg": 1.000061273574829, "objective/train/weighted_lm_loss": 1.4491292238235474, "objective/train/weights_max": 1.8285359144210815, "objective/train/weights_min": 0.3715519905090332, "theoretical_loss": 3.713050639675166, "tokens_seen": 904396800 }, { "epoch": 0.27, "learning_rate": 0.0007332691381800675, "loss": 0.0729, "theoretical_loss": 3.713050639675166, "tokens_seen": 904396800 }, { "epoch": 0.27, "learning_rate": 0.000733188894238485, "loss": 0.0731, "theoretical_loss": 3.712947124848354, "tokens_seen": 904658944 }, { "epoch": 0.27, "learning_rate": 0.0007331086502969026, "loss": 0.0771, "theoretical_loss": 3.7128436484087954, "tokens_seen": 904921088 }, { "epoch": 0.27, "learning_rate": 0.0007330284063553202, "loss": 0.0748, "theoretical_loss": 3.712740210331141, "tokens_seen": 905183232 }, { "epoch": 0.27, "learning_rate": 0.0007329481624137377, "loss": 0.0769, "theoretical_loss": 3.712636810590065, "tokens_seen": 905445376 }, { "epoch": 0.27, "learning_rate": 0.0007328679184721554, "loss": 0.0736, "theoretical_loss": 3.7125334491602664, "tokens_seen": 905707520 }, { "epoch": 0.27, "learning_rate": 0.0007327876745305729, "loss": 0.0743, "theoretical_loss": 3.712430126016467, "tokens_seen": 905969664 }, { "epoch": 0.27, "learning_rate": 0.0007327074305889905, "loss": 0.0751, "theoretical_loss": 3.7123268411334136, "tokens_seen": 906231808 }, { "epoch": 0.27, "learning_rate": 0.0007326271866474082, "loss": 0.0736, "theoretical_loss": 3.7122235944858772, "tokens_seen": 906493952 }, { "epoch": 0.27, "learning_rate": 0.0007325469427058258, "loss": 0.0734, "theoretical_loss": 3.712120386048652, "tokens_seen": 906756096 }, { "epoch": 0.27, "learning_rate": 0.0007324666987642433, "loss": 0.0758, "theoretical_loss": 3.712017215796556, "tokens_seen": 907018240 }, { "epoch": 0.27, "learning_rate": 0.0007323864548226609, "loss": 0.0744, "theoretical_loss": 3.7119140837044315, "tokens_seen": 907280384 }, { "epoch": 0.28, "learning_rate": 0.0007323062108810785, "loss": 0.0735, "theoretical_loss": 3.7118109897471445, "tokens_seen": 907542528 }, { "epoch": 0.28, "learning_rate": 0.000732225966939496, "loss": 0.0732, "theoretical_loss": 3.7117079338995858, "tokens_seen": 907804672 }, { "epoch": 0.28, "learning_rate": 0.0007321457229979137, "loss": 0.0767, "theoretical_loss": 3.7116049161366673, "tokens_seen": 908066816 }, { "epoch": 0.28, "learning_rate": 0.0007320654790563312, "loss": 0.0727, "theoretical_loss": 3.7115019364333275, "tokens_seen": 908328960 }, { "epoch": 0.28, "learning_rate": 0.000731985235114749, "loss": 0.0761, "theoretical_loss": 3.7113989947645276, "tokens_seen": 908591104 }, { "epoch": 0.28, "learning_rate": 0.0007319049911731665, "loss": 0.0754, "theoretical_loss": 3.711296091105252, "tokens_seen": 908853248 }, { "epoch": 0.28, "learning_rate": 0.000731824747231584, "loss": 0.0722, "theoretical_loss": 3.7111932254305096, "tokens_seen": 909115392 }, { "epoch": 0.28, "learning_rate": 0.0007317445032900017, "loss": 0.0747, "theoretical_loss": 3.7110903977153313, "tokens_seen": 909377536 }, { "epoch": 0.28, "learning_rate": 0.0007316642593484192, "loss": 0.0757, "theoretical_loss": 3.710987607934774, "tokens_seen": 909639680 }, { "epoch": 0.28, "learning_rate": 0.0007315840154068368, "loss": 0.0759, "theoretical_loss": 3.7108848560639167, "tokens_seen": 909901824 }, { "epoch": 0.28, "learning_rate": 0.0007315037714652544, "loss": 0.0753, "theoretical_loss": 3.7107821420778615, "tokens_seen": 910163968 }, { "epoch": 0.28, "learning_rate": 0.000731423527523672, "loss": 0.0744, "theoretical_loss": 3.7106794659517357, "tokens_seen": 910426112 }, { "epoch": 0.28, "learning_rate": 0.0007313432835820895, "loss": 0.0729, "theoretical_loss": 3.7105768276606885, "tokens_seen": 910688256 }, { "epoch": 0.28, "objective/train/advantage_avg": -0.001721195294521749, "objective/train/docs_used": 336141, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5599461793899536, "objective/train/original_loss": 1.5599462985992432, "objective/train/theoretical_loss": 3.710474227179893, "objective/train/tokens_used": 931410400, "objective/train/value_avg": -0.006168365478515625, "objective/train/value_loss": 0.00024355393543373793, "objective/train/value_max": -0.0001398324966430664, "objective/train/value_min": -0.4716796875, "objective/train/value_reward_corr": 0.6900471509482724, "objective/train/value_std": 0.0086822509765625, "objective/train/weight_avg": 0.9983928203582764, "objective/train/weighted_lm_loss": 1.5585672855377197, "objective/train/weights_max": 1.1376217603683472, "objective/train/weights_min": 0.3742261826992035, "theoretical_loss": 3.710474227179893, "tokens_seen": 910950400 }, { "epoch": 0.28, "learning_rate": 0.0007312630396405071, "loss": 0.0743, "theoretical_loss": 3.710474227179893, "tokens_seen": 910950400 }, { "epoch": 0.28, "learning_rate": 0.0007311827956989248, "loss": 0.0741, "theoretical_loss": 3.710371664484547, "tokens_seen": 911212544 }, { "epoch": 0.28, "learning_rate": 0.0007311025517573423, "loss": 0.0714, "theoretical_loss": 3.710269139549869, "tokens_seen": 911474688 }, { "epoch": 0.28, "learning_rate": 0.00073102230781576, "loss": 0.0724, "theoretical_loss": 3.7101666523511034, "tokens_seen": 911736832 }, { "epoch": 0.28, "learning_rate": 0.0007309420638741775, "loss": 0.075, "theoretical_loss": 3.710064202863517, "tokens_seen": 911998976 }, { "epoch": 0.28, "learning_rate": 0.0007308618199325952, "loss": 0.077, "theoretical_loss": 3.7099617910623994, "tokens_seen": 912261120 }, { "epoch": 0.28, "learning_rate": 0.0007307815759910127, "loss": 0.0751, "theoretical_loss": 3.7098594169230648, "tokens_seen": 912523264 }, { "epoch": 0.28, "learning_rate": 0.0007307013320494302, "loss": 0.0732, "theoretical_loss": 3.7097570804208497, "tokens_seen": 912785408 }, { "epoch": 0.28, "learning_rate": 0.0007306210881078479, "loss": 0.074, "theoretical_loss": 3.709654781531113, "tokens_seen": 913047552 }, { "epoch": 0.28, "learning_rate": 0.0007305408441662654, "loss": 0.0752, "theoretical_loss": 3.709552520229239, "tokens_seen": 913309696 }, { "epoch": 0.28, "learning_rate": 0.000730460600224683, "loss": 0.074, "theoretical_loss": 3.7094502964906337, "tokens_seen": 913571840 }, { "epoch": 0.28, "learning_rate": 0.0007303803562831007, "loss": 0.0741, "theoretical_loss": 3.709348110290726, "tokens_seen": 913833984 }, { "epoch": 0.28, "learning_rate": 0.0007303001123415183, "loss": 0.0742, "theoretical_loss": 3.7092459616049682, "tokens_seen": 914096128 }, { "epoch": 0.28, "learning_rate": 0.0007302198683999358, "loss": 0.0745, "theoretical_loss": 3.709143850408837, "tokens_seen": 914358272 }, { "epoch": 0.28, "learning_rate": 0.0007301396244583534, "loss": 0.074, "theoretical_loss": 3.7090417766778305, "tokens_seen": 914620416 }, { "epoch": 0.28, "learning_rate": 0.000730059380516771, "loss": 0.0736, "theoretical_loss": 3.7089397403874704, "tokens_seen": 914882560 }, { "epoch": 0.28, "learning_rate": 0.0007299791365751885, "loss": 0.0742, "theoretical_loss": 3.708837741513301, "tokens_seen": 915144704 }, { "epoch": 0.28, "learning_rate": 0.0007298988926336062, "loss": 0.0751, "theoretical_loss": 3.7087357800308904, "tokens_seen": 915406848 }, { "epoch": 0.28, "learning_rate": 0.0007298186486920237, "loss": 0.0734, "theoretical_loss": 3.708633855915829, "tokens_seen": 915668992 }, { "epoch": 0.28, "learning_rate": 0.0007297384047504413, "loss": 0.0739, "theoretical_loss": 3.708531969143731, "tokens_seen": 915931136 }, { "epoch": 0.28, "learning_rate": 0.000729658160808859, "loss": 0.0766, "theoretical_loss": 3.708430119690232, "tokens_seen": 916193280 }, { "epoch": 0.28, "learning_rate": 0.0007295779168672765, "loss": 0.0748, "theoretical_loss": 3.708328307530991, "tokens_seen": 916455424 }, { "epoch": 0.28, "learning_rate": 0.0007294976729256942, "loss": 0.0736, "theoretical_loss": 3.7082265326416914, "tokens_seen": 916717568 }, { "epoch": 0.28, "learning_rate": 0.0007294174289841117, "loss": 0.0732, "theoretical_loss": 3.708124794998037, "tokens_seen": 916979712 }, { "epoch": 0.28, "learning_rate": 0.0007293371850425293, "loss": 0.0722, "theoretical_loss": 3.708023094575756, "tokens_seen": 917241856 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0011513980571180582, "objective/train/docs_used": 338425, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.586716890335083, "objective/train/original_loss": 1.5867170095443726, "objective/train/theoretical_loss": 3.7079214313505986, "objective/train/tokens_used": 937964000, "objective/train/value_avg": -0.0078125, "objective/train/value_loss": 0.00020614459936041385, "objective/train/value_max": -0.00014650821685791016, "objective/train/value_min": -0.955078125, "objective/train/value_reward_corr": 0.6817206994700864, "objective/train/value_std": 0.014862060546875, "objective/train/weight_avg": 1.0012516975402832, "objective/train/weighted_lm_loss": 1.588771939277649, "objective/train/weights_max": 2.5229907035827637, "objective/train/weights_min": 0.39287444949150085, "theoretical_loss": 3.7079214313505986, "tokens_seen": 917504000 }, { "epoch": 0.28, "learning_rate": 0.0007292569411009469, "loss": 0.0756, "theoretical_loss": 3.7079214313505986, "tokens_seen": 917504000 }, { "epoch": 0.28, "learning_rate": 0.0007291766971593645, "loss": 0.073, "theoretical_loss": 3.707819805298338, "tokens_seen": 917766144 }, { "epoch": 0.28, "learning_rate": 0.000729096453217782, "loss": 0.0756, "theoretical_loss": 3.7077182163947704, "tokens_seen": 918028288 }, { "epoch": 0.28, "learning_rate": 0.0007290162092761998, "loss": 0.0748, "theoretical_loss": 3.7076166646157134, "tokens_seen": 918290432 }, { "epoch": 0.28, "learning_rate": 0.0007289359653346173, "loss": 0.075, "theoretical_loss": 3.7075151499370094, "tokens_seen": 918552576 }, { "epoch": 0.28, "learning_rate": 0.0007288557213930348, "loss": 0.0768, "theoretical_loss": 3.7074136723345212, "tokens_seen": 918814720 }, { "epoch": 0.28, "learning_rate": 0.0007287754774514525, "loss": 0.0768, "theoretical_loss": 3.707312231784136, "tokens_seen": 919076864 }, { "epoch": 0.28, "learning_rate": 0.00072869523350987, "loss": 0.0745, "theoretical_loss": 3.7072108282617617, "tokens_seen": 919339008 }, { "epoch": 0.28, "learning_rate": 0.0007286149895682876, "loss": 0.0756, "theoretical_loss": 3.7071094617433307, "tokens_seen": 919601152 }, { "epoch": 0.28, "learning_rate": 0.0007285347456267052, "loss": 0.0748, "theoretical_loss": 3.707008132204796, "tokens_seen": 919863296 }, { "epoch": 0.28, "learning_rate": 0.0007284545016851228, "loss": 0.0734, "theoretical_loss": 3.7069068396221345, "tokens_seen": 920125440 }, { "epoch": 0.28, "learning_rate": 0.0007283742577435403, "loss": 0.0732, "theoretical_loss": 3.706805583971345, "tokens_seen": 920387584 }, { "epoch": 0.28, "learning_rate": 0.0007282940138019579, "loss": 0.0756, "theoretical_loss": 3.7067043652284495, "tokens_seen": 920649728 }, { "epoch": 0.28, "learning_rate": 0.0007282137698603756, "loss": 0.0809, "theoretical_loss": 3.7066031833694906, "tokens_seen": 920911872 }, { "epoch": 0.28, "learning_rate": 0.0007281335259187932, "loss": 0.0763, "theoretical_loss": 3.7065020383705347, "tokens_seen": 921174016 }, { "epoch": 0.28, "learning_rate": 0.0007280532819772108, "loss": 0.0719, "theoretical_loss": 3.70640093020767, "tokens_seen": 921436160 }, { "epoch": 0.28, "learning_rate": 0.0007279730380356283, "loss": 0.0762, "theoretical_loss": 3.7062998588570073, "tokens_seen": 921698304 }, { "epoch": 0.28, "learning_rate": 0.000727892794094046, "loss": 0.074, "theoretical_loss": 3.7061988242946793, "tokens_seen": 921960448 }, { "epoch": 0.28, "learning_rate": 0.0007278125501524635, "loss": 0.0744, "theoretical_loss": 3.7060978264968423, "tokens_seen": 922222592 }, { "epoch": 0.28, "learning_rate": 0.000727732306210881, "loss": 0.0749, "theoretical_loss": 3.705996865439672, "tokens_seen": 922484736 }, { "epoch": 0.28, "learning_rate": 0.0007276520622692987, "loss": 0.0727, "theoretical_loss": 3.7058959410993695, "tokens_seen": 922746880 }, { "epoch": 0.28, "learning_rate": 0.0007275718183277162, "loss": 0.0743, "theoretical_loss": 3.7057950534521558, "tokens_seen": 923009024 }, { "epoch": 0.28, "learning_rate": 0.0007274915743861338, "loss": 0.0763, "theoretical_loss": 3.705694202474275, "tokens_seen": 923271168 }, { "epoch": 0.28, "learning_rate": 0.0007274113304445515, "loss": 0.0742, "theoretical_loss": 3.7055933881419936, "tokens_seen": 923533312 }, { "epoch": 0.28, "learning_rate": 0.0007273310865029691, "loss": 0.0705, "theoretical_loss": 3.7054926104315995, "tokens_seen": 923795456 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0009234403842128813, "objective/train/docs_used": 340768, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5103869438171387, "objective/train/original_loss": 1.5103867053985596, "objective/train/theoretical_loss": 3.705391869319403, "objective/train/tokens_used": 944517600, "objective/train/value_avg": -0.00710296630859375, "objective/train/value_loss": 0.00018672032456379384, "objective/train/value_max": -0.00013136863708496094, "objective/train/value_min": -0.277099609375, "objective/train/value_reward_corr": 0.6575917614537756, "objective/train/value_std": 0.01128387451171875, "objective/train/weight_avg": 1.001011610031128, "objective/train/weighted_lm_loss": 1.5110077857971191, "objective/train/weights_max": 1.3192977905273438, "objective/train/weights_min": 0.3727017939090729, "theoretical_loss": 3.705391869319403, "tokens_seen": 924057600 }, { "epoch": 0.28, "learning_rate": 0.0007272508425613866, "loss": 0.0732, "theoretical_loss": 3.705391869319403, "tokens_seen": 924057600 }, { "epoch": 0.28, "learning_rate": 0.0007271705986198042, "loss": 0.0717, "theoretical_loss": 3.7052911647817357, "tokens_seen": 924319744 }, { "epoch": 0.28, "learning_rate": 0.0007270903546782218, "loss": 0.0752, "theoretical_loss": 3.7051904967949527, "tokens_seen": 924581888 }, { "epoch": 0.28, "learning_rate": 0.0007270101107366394, "loss": 0.0711, "theoretical_loss": 3.7050898653354296, "tokens_seen": 924844032 }, { "epoch": 0.28, "learning_rate": 0.000726929866795057, "loss": 0.0745, "theoretical_loss": 3.7049892703795653, "tokens_seen": 925106176 }, { "epoch": 0.28, "learning_rate": 0.0007268496228534745, "loss": 0.0739, "theoretical_loss": 3.70488871190378, "tokens_seen": 925368320 }, { "epoch": 0.28, "learning_rate": 0.0007267693789118923, "loss": 0.0737, "theoretical_loss": 3.704788189884515, "tokens_seen": 925630464 }, { "epoch": 0.28, "learning_rate": 0.0007266891349703098, "loss": 0.0753, "theoretical_loss": 3.7046877042982347, "tokens_seen": 925892608 }, { "epoch": 0.28, "learning_rate": 0.0007266088910287273, "loss": 0.0757, "theoretical_loss": 3.7045872551214254, "tokens_seen": 926154752 }, { "epoch": 0.28, "learning_rate": 0.000726528647087145, "loss": 0.0719, "theoretical_loss": 3.704486842330594, "tokens_seen": 926416896 }, { "epoch": 0.28, "learning_rate": 0.0007264484031455625, "loss": 0.074, "theoretical_loss": 3.7043864659022696, "tokens_seen": 926679040 }, { "epoch": 0.28, "learning_rate": 0.0007263681592039801, "loss": 0.0714, "theoretical_loss": 3.704286125813004, "tokens_seen": 926941184 }, { "epoch": 0.28, "learning_rate": 0.0007262879152623977, "loss": 0.074, "theoretical_loss": 3.7041858220393706, "tokens_seen": 927203328 }, { "epoch": 0.28, "learning_rate": 0.0007262076713208153, "loss": 0.0731, "theoretical_loss": 3.704085554557964, "tokens_seen": 927465472 }, { "epoch": 0.28, "learning_rate": 0.0007261274273792328, "loss": 0.0728, "theoretical_loss": 3.703985323345399, "tokens_seen": 927727616 }, { "epoch": 0.28, "learning_rate": 0.0007260471834376505, "loss": 0.0705, "theoretical_loss": 3.7038851283783156, "tokens_seen": 927989760 }, { "epoch": 0.28, "learning_rate": 0.000725966939496068, "loss": 0.075, "theoretical_loss": 3.7037849696333724, "tokens_seen": 928251904 }, { "epoch": 0.28, "learning_rate": 0.0007258866955544856, "loss": 0.0765, "theoretical_loss": 3.703684847087251, "tokens_seen": 928514048 }, { "epoch": 0.28, "learning_rate": 0.0007258064516129033, "loss": 0.0759, "theoretical_loss": 3.7035847607166534, "tokens_seen": 928776192 }, { "epoch": 0.28, "learning_rate": 0.0007257262076713208, "loss": 0.0734, "theoretical_loss": 3.703484710498306, "tokens_seen": 929038336 }, { "epoch": 0.28, "learning_rate": 0.0007256459637297385, "loss": 0.0801, "theoretical_loss": 3.703384696408953, "tokens_seen": 929300480 }, { "epoch": 0.28, "learning_rate": 0.000725565719788156, "loss": 0.0735, "theoretical_loss": 3.7032847184253628, "tokens_seen": 929562624 }, { "epoch": 0.28, "learning_rate": 0.0007254854758465736, "loss": 0.0732, "theoretical_loss": 3.7031847765243233, "tokens_seen": 929824768 }, { "epoch": 0.28, "learning_rate": 0.0007254052319049912, "loss": 0.0713, "theoretical_loss": 3.7030848706826465, "tokens_seen": 930086912 }, { "epoch": 0.28, "learning_rate": 0.0007253249879634087, "loss": 0.0752, "theoretical_loss": 3.702985000877163, "tokens_seen": 930349056 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0017973899375647306, "objective/train/docs_used": 343003, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5838013887405396, "objective/train/original_loss": 1.5838017463684082, "objective/train/theoretical_loss": 3.7028851670847267, "objective/train/tokens_used": 951071200, "objective/train/value_avg": -0.009552001953125, "objective/train/value_loss": 0.00028080277843400836, "objective/train/value_max": -0.00020182132720947266, "objective/train/value_min": -0.52685546875, "objective/train/value_reward_corr": 0.687662785571624, "objective/train/value_std": 0.0157623291015625, "objective/train/weight_avg": 1.001927137374878, "objective/train/weighted_lm_loss": 1.586591362953186, "objective/train/weights_max": 1.4974627494812012, "objective/train/weights_min": 0.3688567578792572, "theoretical_loss": 3.7028851670847267, "tokens_seen": 930611200 }, { "epoch": 0.28, "learning_rate": 0.0007252447440218264, "loss": 0.0745, "theoretical_loss": 3.7028851670847267, "tokens_seen": 930611200 }, { "epoch": 0.28, "learning_rate": 0.000725164500080244, "loss": 0.0729, "theoretical_loss": 3.7027853692822124, "tokens_seen": 930873344 }, { "epoch": 0.28, "learning_rate": 0.0007250842561386616, "loss": 0.076, "theoretical_loss": 3.702685607446516, "tokens_seen": 931135488 }, { "epoch": 0.28, "learning_rate": 0.0007250040121970791, "loss": 0.076, "theoretical_loss": 3.7025858815545543, "tokens_seen": 931397632 }, { "epoch": 0.28, "learning_rate": 0.0007249237682554968, "loss": 0.0746, "theoretical_loss": 3.7024861915832665, "tokens_seen": 931659776 }, { "epoch": 0.28, "learning_rate": 0.0007248435243139143, "loss": 0.0739, "theoretical_loss": 3.7023865375096126, "tokens_seen": 931921920 }, { "epoch": 0.28, "learning_rate": 0.0007247632803723318, "loss": 0.0757, "theoretical_loss": 3.7022869193105734, "tokens_seen": 932184064 }, { "epoch": 0.28, "learning_rate": 0.0007246830364307495, "loss": 0.0728, "theoretical_loss": 3.702187336963151, "tokens_seen": 932446208 }, { "epoch": 0.28, "learning_rate": 0.000724602792489167, "loss": 0.0741, "theoretical_loss": 3.70208779044437, "tokens_seen": 932708352 }, { "epoch": 0.28, "learning_rate": 0.0007245225485475848, "loss": 0.0719, "theoretical_loss": 3.7019882797312746, "tokens_seen": 932970496 }, { "epoch": 0.28, "learning_rate": 0.0007244423046060023, "loss": 0.0794, "theoretical_loss": 3.701888804800931, "tokens_seen": 933232640 }, { "epoch": 0.28, "learning_rate": 0.0007243620606644199, "loss": 0.071, "theoretical_loss": 3.701789365630426, "tokens_seen": 933494784 }, { "epoch": 0.28, "learning_rate": 0.0007242818167228375, "loss": 0.0728, "theoretical_loss": 3.701689962196868, "tokens_seen": 933756928 }, { "epoch": 0.28, "learning_rate": 0.000724201572781255, "loss": 0.0769, "theoretical_loss": 3.701590594477387, "tokens_seen": 934019072 }, { "epoch": 0.28, "learning_rate": 0.0007241213288396726, "loss": 0.0766, "theoretical_loss": 3.701491262449131, "tokens_seen": 934281216 }, { "epoch": 0.28, "learning_rate": 0.0007240410848980902, "loss": 0.0764, "theoretical_loss": 3.7013919660892736, "tokens_seen": 934543360 }, { "epoch": 0.28, "learning_rate": 0.0007239608409565078, "loss": 0.0743, "theoretical_loss": 3.701292705375006, "tokens_seen": 934805504 }, { "epoch": 0.28, "learning_rate": 0.0007238805970149253, "loss": 0.0755, "theoretical_loss": 3.701193480283542, "tokens_seen": 935067648 }, { "epoch": 0.28, "learning_rate": 0.000723800353073343, "loss": 0.0718, "theoretical_loss": 3.701094290792116, "tokens_seen": 935329792 }, { "epoch": 0.28, "learning_rate": 0.0007237201091317606, "loss": 0.0755, "theoretical_loss": 3.7009951368779825, "tokens_seen": 935591936 }, { "epoch": 0.28, "learning_rate": 0.0007236398651901781, "loss": 0.0729, "theoretical_loss": 3.700896018518418, "tokens_seen": 935854080 }, { "epoch": 0.28, "learning_rate": 0.0007235596212485958, "loss": 0.0756, "theoretical_loss": 3.700796935690719, "tokens_seen": 936116224 }, { "epoch": 0.28, "learning_rate": 0.0007234793773070133, "loss": 0.0736, "theoretical_loss": 3.700697888372204, "tokens_seen": 936378368 }, { "epoch": 0.28, "learning_rate": 0.0007233991333654309, "loss": 0.0746, "theoretical_loss": 3.7005988765402114, "tokens_seen": 936640512 }, { "epoch": 0.28, "learning_rate": 0.0007233188894238485, "loss": 0.0735, "theoretical_loss": 3.700499900172101, "tokens_seen": 936902656 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0006104443455114961, "objective/train/docs_used": 344763, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4431649446487427, "objective/train/original_loss": 1.4431649446487427, "objective/train/theoretical_loss": 3.700400959245252, "objective/train/tokens_used": 957624800, "objective/train/value_avg": -0.01019287109375, "objective/train/value_loss": 0.00036883429856970906, "objective/train/value_max": -0.0001252889633178711, "objective/train/value_min": -0.8720703125, "objective/train/value_reward_corr": 0.8053598602672474, "objective/train/value_std": 0.0275726318359375, "objective/train/weight_avg": 1.000779628753662, "objective/train/weighted_lm_loss": 1.4440263509750366, "objective/train/weights_max": 1.7109980583190918, "objective/train/weights_min": 0.3686527907848358, "theoretical_loss": 3.700400959245252, "tokens_seen": 937164800 }, { "epoch": 0.28, "learning_rate": 0.0007232386454822661, "loss": 0.0714, "theoretical_loss": 3.700400959245252, "tokens_seen": 937164800 }, { "epoch": 0.28, "learning_rate": 0.0007231584015406837, "loss": 0.0727, "theoretical_loss": 3.7003020537370657, "tokens_seen": 937426944 }, { "epoch": 0.28, "learning_rate": 0.0007230781575991013, "loss": 0.0735, "theoretical_loss": 3.7002031836249643, "tokens_seen": 937689088 }, { "epoch": 0.28, "learning_rate": 0.0007229979136575189, "loss": 0.0755, "theoretical_loss": 3.7001043488863896, "tokens_seen": 937951232 }, { "epoch": 0.28, "learning_rate": 0.0007229176697159365, "loss": 0.0751, "theoretical_loss": 3.7000055494988047, "tokens_seen": 938213376 }, { "epoch": 0.28, "learning_rate": 0.0007228374257743541, "loss": 0.0768, "theoretical_loss": 3.6999067854396936, "tokens_seen": 938475520 }, { "epoch": 0.28, "learning_rate": 0.0007227571818327716, "loss": 0.0713, "theoretical_loss": 3.6998080566865608, "tokens_seen": 938737664 }, { "epoch": 0.28, "learning_rate": 0.0007226769378911893, "loss": 0.0738, "theoretical_loss": 3.6997093632169307, "tokens_seen": 938999808 }, { "epoch": 0.28, "learning_rate": 0.0007225966939496068, "loss": 0.0765, "theoretical_loss": 3.699610705008349, "tokens_seen": 939261952 }, { "epoch": 0.28, "learning_rate": 0.0007225164500080244, "loss": 0.0705, "theoretical_loss": 3.6995120820383818, "tokens_seen": 939524096 }, { "epoch": 0.28, "learning_rate": 0.000722436206066442, "loss": 0.0741, "theoretical_loss": 3.6994134942846157, "tokens_seen": 939786240 }, { "epoch": 0.28, "learning_rate": 0.0007223559621248595, "loss": 0.0758, "theoretical_loss": 3.6993149417246576, "tokens_seen": 940048384 }, { "epoch": 0.28, "learning_rate": 0.0007222757181832772, "loss": 0.0735, "theoretical_loss": 3.699216424336135, "tokens_seen": 940310528 }, { "epoch": 0.29, "learning_rate": 0.0007221954742416948, "loss": 0.0745, "theoretical_loss": 3.6991179420966964, "tokens_seen": 940572672 }, { "epoch": 0.29, "learning_rate": 0.0007221152303001124, "loss": 0.0728, "theoretical_loss": 3.69901949498401, "tokens_seen": 940834816 }, { "epoch": 0.29, "learning_rate": 0.0007220349863585299, "loss": 0.0759, "theoretical_loss": 3.6989210829757644, "tokens_seen": 941096960 }, { "epoch": 0.29, "learning_rate": 0.0007219547424169476, "loss": 0.0756, "theoretical_loss": 3.6988227060496692, "tokens_seen": 941359104 }, { "epoch": 0.29, "learning_rate": 0.0007218744984753651, "loss": 0.0761, "theoretical_loss": 3.6987243641834535, "tokens_seen": 941621248 }, { "epoch": 0.29, "learning_rate": 0.0007217942545337827, "loss": 0.0743, "theoretical_loss": 3.6986260573548675, "tokens_seen": 941883392 }, { "epoch": 0.29, "learning_rate": 0.0007217140105922003, "loss": 0.0746, "theoretical_loss": 3.698527785541682, "tokens_seen": 942145536 }, { "epoch": 0.29, "learning_rate": 0.0007216337666506178, "loss": 0.0741, "theoretical_loss": 3.6984295487216867, "tokens_seen": 942407680 }, { "epoch": 0.29, "learning_rate": 0.0007215535227090356, "loss": 0.0745, "theoretical_loss": 3.6983313468726924, "tokens_seen": 942669824 }, { "epoch": 0.29, "learning_rate": 0.0007214732787674531, "loss": 0.0772, "theoretical_loss": 3.6982331799725303, "tokens_seen": 942931968 }, { "epoch": 0.29, "learning_rate": 0.0007213930348258707, "loss": 0.0733, "theoretical_loss": 3.6981350479990525, "tokens_seen": 943194112 }, { "epoch": 0.29, "learning_rate": 0.0007213127908842883, "loss": 0.078, "theoretical_loss": 3.6980369509301285, "tokens_seen": 943456256 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0005385353579185903, "objective/train/docs_used": 346959, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3849838972091675, "objective/train/original_loss": 1.384983777999878, "objective/train/theoretical_loss": 3.6979388887436517, "objective/train/tokens_used": 964178400, "objective/train/value_avg": -0.008209228515625, "objective/train/value_loss": 0.00034320203121751547, "objective/train/value_max": -5.8770179748535156e-05, "objective/train/value_min": -0.97412109375, "objective/train/value_reward_corr": 0.7214583684226242, "objective/train/value_std": 0.0195465087890625, "objective/train/weight_avg": 1.0007014274597168, "objective/train/weighted_lm_loss": 1.3859144449234009, "objective/train/weights_max": 2.094869613647461, "objective/train/weights_min": 0.3750951290130615, "theoretical_loss": 3.6979388887436517, "tokens_seen": 943718400 }, { "epoch": 0.29, "learning_rate": 0.0007212325469427058, "loss": 0.0757, "theoretical_loss": 3.6979388887436517, "tokens_seen": 943718400 }, { "epoch": 0.29, "learning_rate": 0.0007211523030011234, "loss": 0.0757, "theoretical_loss": 3.697840861417533, "tokens_seen": 943980544 }, { "epoch": 0.29, "learning_rate": 0.000721072059059541, "loss": 0.0754, "theoretical_loss": 3.697742868929704, "tokens_seen": 944242688 }, { "epoch": 0.29, "learning_rate": 0.0007209918151179586, "loss": 0.0744, "theoretical_loss": 3.6976449112581173, "tokens_seen": 944504832 }, { "epoch": 0.29, "learning_rate": 0.0007209115711763761, "loss": 0.0744, "theoretical_loss": 3.697546988380744, "tokens_seen": 944766976 }, { "epoch": 0.29, "learning_rate": 0.0007208313272347939, "loss": 0.076, "theoretical_loss": 3.697449100275577, "tokens_seen": 945029120 }, { "epoch": 0.29, "learning_rate": 0.0007207510832932114, "loss": 0.0726, "theoretical_loss": 3.6973512469206278, "tokens_seen": 945291264 }, { "epoch": 0.29, "learning_rate": 0.000720670839351629, "loss": 0.0732, "theoretical_loss": 3.6972534282939282, "tokens_seen": 945553408 }, { "epoch": 0.29, "learning_rate": 0.0007205905954100466, "loss": 0.0726, "theoretical_loss": 3.6971556443735314, "tokens_seen": 945815552 }, { "epoch": 0.29, "learning_rate": 0.0007205103514684641, "loss": 0.0709, "theoretical_loss": 3.697057895137508, "tokens_seen": 946077696 }, { "epoch": 0.29, "learning_rate": 0.0007204301075268818, "loss": 0.077, "theoretical_loss": 3.696960180563951, "tokens_seen": 946339840 }, { "epoch": 0.29, "learning_rate": 0.0007203498635852993, "loss": 0.0751, "theoretical_loss": 3.6968625006309717, "tokens_seen": 946601984 }, { "epoch": 0.29, "learning_rate": 0.0007202696196437169, "loss": 0.0745, "theoretical_loss": 3.6967648553167014, "tokens_seen": 946864128 }, { "epoch": 0.29, "learning_rate": 0.0007201893757021345, "loss": 0.0737, "theoretical_loss": 3.696667244599292, "tokens_seen": 947126272 }, { "epoch": 0.29, "learning_rate": 0.000720109131760552, "loss": 0.0758, "theoretical_loss": 3.6965696684569154, "tokens_seen": 947388416 }, { "epoch": 0.29, "learning_rate": 0.0007200288878189697, "loss": 0.0759, "theoretical_loss": 3.6964721268677616, "tokens_seen": 947650560 }, { "epoch": 0.29, "learning_rate": 0.0007199486438773873, "loss": 0.0743, "theoretical_loss": 3.696374619810043, "tokens_seen": 947912704 }, { "epoch": 0.29, "learning_rate": 0.0007198683999358049, "loss": 0.0713, "theoretical_loss": 3.6962771472619886, "tokens_seen": 948174848 }, { "epoch": 0.29, "learning_rate": 0.0007197881559942224, "loss": 0.0743, "theoretical_loss": 3.69617970920185, "tokens_seen": 948436992 }, { "epoch": 0.29, "learning_rate": 0.0007197079120526401, "loss": 0.0767, "theoretical_loss": 3.6960823056078973, "tokens_seen": 948699136 }, { "epoch": 0.29, "learning_rate": 0.0007196276681110576, "loss": 0.0766, "theoretical_loss": 3.6959849364584203, "tokens_seen": 948961280 }, { "epoch": 0.29, "learning_rate": 0.0007195474241694752, "loss": 0.0752, "theoretical_loss": 3.695887601731728, "tokens_seen": 949223424 }, { "epoch": 0.29, "learning_rate": 0.0007194671802278928, "loss": 0.0764, "theoretical_loss": 3.69579030140615, "tokens_seen": 949485568 }, { "epoch": 0.29, "learning_rate": 0.0007193869362863103, "loss": 0.075, "theoretical_loss": 3.6956930354600352, "tokens_seen": 949747712 }, { "epoch": 0.29, "learning_rate": 0.0007193066923447281, "loss": 0.0748, "theoretical_loss": 3.6955958038717522, "tokens_seen": 950009856 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.000604399829171598, "objective/train/docs_used": 349336, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.432031512260437, "objective/train/original_loss": 1.432031512260437, "objective/train/theoretical_loss": 3.695498606619688, "objective/train/tokens_used": 970732000, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.00045492048957385123, "objective/train/value_max": -0.00015842914581298828, "objective/train/value_min": -0.95703125, "objective/train/value_reward_corr": 0.5685032250536571, "objective/train/value_std": 0.01549530029296875, "objective/train/weight_avg": 1.0007842779159546, "objective/train/weighted_lm_loss": 1.4333014488220215, "objective/train/weights_max": 2.597604990005493, "objective/train/weights_min": 0.05960186570882797, "theoretical_loss": 3.695498606619688, "tokens_seen": 950272000 }, { "epoch": 0.29, "learning_rate": 0.0007192264484031456, "loss": 0.0746, "theoretical_loss": 3.695498606619688, "tokens_seen": 950272000 }, { "epoch": 0.29, "learning_rate": 0.0007191462044615632, "loss": 0.0727, "theoretical_loss": 3.6954014436822513, "tokens_seen": 950534144 }, { "epoch": 0.29, "learning_rate": 0.0007190659605199808, "loss": 0.0749, "theoretical_loss": 3.695304315037868, "tokens_seen": 950796288 }, { "epoch": 0.29, "learning_rate": 0.0007189857165783984, "loss": 0.0744, "theoretical_loss": 3.6952072206649857, "tokens_seen": 951058432 }, { "epoch": 0.29, "learning_rate": 0.0007189054726368159, "loss": 0.075, "theoretical_loss": 3.695110160542069, "tokens_seen": 951320576 }, { "epoch": 0.29, "learning_rate": 0.0007188252286952335, "loss": 0.0741, "theoretical_loss": 3.6950131346476054, "tokens_seen": 951582720 }, { "epoch": 0.29, "learning_rate": 0.0007187449847536511, "loss": 0.0753, "theoretical_loss": 3.694916142960098, "tokens_seen": 951844864 }, { "epoch": 0.29, "learning_rate": 0.0007186647408120686, "loss": 0.0723, "theoretical_loss": 3.6948191854580728, "tokens_seen": 952107008 }, { "epoch": 0.29, "learning_rate": 0.0007185844968704864, "loss": 0.0736, "theoretical_loss": 3.694722262120072, "tokens_seen": 952369152 }, { "epoch": 0.29, "learning_rate": 0.0007185042529289039, "loss": 0.0751, "theoretical_loss": 3.6946253729246594, "tokens_seen": 952631296 }, { "epoch": 0.29, "learning_rate": 0.0007184240089873215, "loss": 0.0732, "theoretical_loss": 3.6945285178504172, "tokens_seen": 952893440 }, { "epoch": 0.29, "learning_rate": 0.0007183437650457391, "loss": 0.0727, "theoretical_loss": 3.694431696875948, "tokens_seen": 953155584 }, { "epoch": 0.29, "learning_rate": 0.0007182635211041566, "loss": 0.0777, "theoretical_loss": 3.6943349099798715, "tokens_seen": 953417728 }, { "epoch": 0.29, "learning_rate": 0.0007181832771625743, "loss": 0.0706, "theoretical_loss": 3.6942381571408287, "tokens_seen": 953679872 }, { "epoch": 0.29, "learning_rate": 0.0007181030332209918, "loss": 0.0759, "theoretical_loss": 3.6941414383374793, "tokens_seen": 953942016 }, { "epoch": 0.29, "learning_rate": 0.0007180227892794094, "loss": 0.0725, "theoretical_loss": 3.6940447535485026, "tokens_seen": 954204160 }, { "epoch": 0.29, "learning_rate": 0.000717942545337827, "loss": 0.0734, "theoretical_loss": 3.6939481027525956, "tokens_seen": 954466304 }, { "epoch": 0.29, "learning_rate": 0.0007178623013962447, "loss": 0.0737, "theoretical_loss": 3.6938514859284766, "tokens_seen": 954728448 }, { "epoch": 0.29, "learning_rate": 0.0007177820574546622, "loss": 0.0737, "theoretical_loss": 3.6937549030548813, "tokens_seen": 954990592 }, { "epoch": 0.29, "learning_rate": 0.0007177018135130798, "loss": 0.0748, "theoretical_loss": 3.693658354110565, "tokens_seen": 955252736 }, { "epoch": 0.29, "learning_rate": 0.0007176215695714974, "loss": 0.0738, "theoretical_loss": 3.6935618390743032, "tokens_seen": 955514880 }, { "epoch": 0.29, "learning_rate": 0.0007175413256299149, "loss": 0.0742, "theoretical_loss": 3.6934653579248886, "tokens_seen": 955777024 }, { "epoch": 0.29, "learning_rate": 0.0007174610816883326, "loss": 0.0727, "theoretical_loss": 3.693368910641135, "tokens_seen": 956039168 }, { "epoch": 0.29, "learning_rate": 0.0007173808377467501, "loss": 0.076, "theoretical_loss": 3.693272497201874, "tokens_seen": 956301312 }, { "epoch": 0.29, "learning_rate": 0.0007173005938051677, "loss": 0.0776, "theoretical_loss": 3.6931761175859554, "tokens_seen": 956563456 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0019286867463961244, "objective/train/docs_used": 351696, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4690124988555908, "objective/train/original_loss": 1.4690124988555908, "objective/train/theoretical_loss": 3.693079771772251, "objective/train/tokens_used": 977285600, "objective/train/value_avg": -0.008026123046875, "objective/train/value_loss": 0.0001780358434189111, "objective/train/value_max": -0.00020503997802734375, "objective/train/value_min": -0.70703125, "objective/train/value_reward_corr": 0.6057208029602728, "objective/train/value_std": 0.01194000244140625, "objective/train/weight_avg": 1.0020111799240112, "objective/train/weighted_lm_loss": 1.4720525741577148, "objective/train/weights_max": 1.3528940677642822, "objective/train/weights_min": 0.36884480714797974, "theoretical_loss": 3.693079771772251, "tokens_seen": 956825600 }, { "epoch": 0.29, "learning_rate": 0.0007172203498635853, "loss": 0.0746, "theoretical_loss": 3.693079771772251, "tokens_seen": 956825600 }, { "epoch": 0.29, "learning_rate": 0.0007171401059220028, "loss": 0.0763, "theoretical_loss": 3.692983459739649, "tokens_seen": 957087744 }, { "epoch": 0.29, "learning_rate": 0.0007170598619804205, "loss": 0.0776, "theoretical_loss": 3.6928871814670563, "tokens_seen": 957349888 }, { "epoch": 0.29, "learning_rate": 0.0007169796180388381, "loss": 0.0762, "theoretical_loss": 3.6927909369334007, "tokens_seen": 957612032 }, { "epoch": 0.29, "learning_rate": 0.0007168993740972557, "loss": 0.0724, "theoretical_loss": 3.6926947261176277, "tokens_seen": 957874176 }, { "epoch": 0.29, "learning_rate": 0.0007168191301556733, "loss": 0.076, "theoretical_loss": 3.692598548998702, "tokens_seen": 958136320 }, { "epoch": 0.29, "learning_rate": 0.0007167388862140909, "loss": 0.077, "theoretical_loss": 3.692502405555606, "tokens_seen": 958398464 }, { "epoch": 0.29, "learning_rate": 0.0007166586422725084, "loss": 0.0769, "theoretical_loss": 3.692406295767344, "tokens_seen": 958660608 }, { "epoch": 0.29, "learning_rate": 0.0007165783983309261, "loss": 0.0752, "theoretical_loss": 3.692310219612936, "tokens_seen": 958922752 }, { "epoch": 0.29, "learning_rate": 0.0007164981543893436, "loss": 0.0741, "theoretical_loss": 3.6922141770714214, "tokens_seen": 959184896 }, { "epoch": 0.29, "learning_rate": 0.0007164179104477611, "loss": 0.0736, "theoretical_loss": 3.6921181681218602, "tokens_seen": 959447040 }, { "epoch": 0.29, "learning_rate": 0.0007163376665061789, "loss": 0.0753, "theoretical_loss": 3.6920221927433294, "tokens_seen": 959709184 }, { "epoch": 0.29, "learning_rate": 0.0007162574225645964, "loss": 0.0771, "theoretical_loss": 3.691926250914925, "tokens_seen": 959971328 }, { "epoch": 0.29, "learning_rate": 0.000716177178623014, "loss": 0.0751, "theoretical_loss": 3.691830342615763, "tokens_seen": 960233472 }, { "epoch": 0.29, "learning_rate": 0.0007160969346814316, "loss": 0.0743, "theoretical_loss": 3.6917344678249755, "tokens_seen": 960495616 }, { "epoch": 0.29, "learning_rate": 0.0007160166907398492, "loss": 0.0733, "theoretical_loss": 3.6916386265217156, "tokens_seen": 960757760 }, { "epoch": 0.29, "learning_rate": 0.0007159364467982667, "loss": 0.0727, "theoretical_loss": 3.6915428186851553, "tokens_seen": 961019904 }, { "epoch": 0.29, "learning_rate": 0.0007158562028566843, "loss": 0.0751, "theoretical_loss": 3.6914470442944824, "tokens_seen": 961282048 }, { "epoch": 0.29, "learning_rate": 0.0007157759589151019, "loss": 0.0722, "theoretical_loss": 3.691351303328907, "tokens_seen": 961544192 }, { "epoch": 0.29, "learning_rate": 0.0007156957149735194, "loss": 0.0753, "theoretical_loss": 3.691255595767654, "tokens_seen": 961806336 }, { "epoch": 0.29, "learning_rate": 0.0007156154710319372, "loss": 0.0765, "theoretical_loss": 3.6911599215899704, "tokens_seen": 962068480 }, { "epoch": 0.29, "learning_rate": 0.0007155352270903547, "loss": 0.0757, "theoretical_loss": 3.6910642807751195, "tokens_seen": 962330624 }, { "epoch": 0.29, "learning_rate": 0.0007154549831487724, "loss": 0.075, "theoretical_loss": 3.6909686733023843, "tokens_seen": 962592768 }, { "epoch": 0.29, "learning_rate": 0.0007153747392071899, "loss": 0.0721, "theoretical_loss": 3.690873099151065, "tokens_seen": 962854912 }, { "epoch": 0.29, "learning_rate": 0.0007152944952656074, "loss": 0.0729, "theoretical_loss": 3.690777558300482, "tokens_seen": 963117056 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0009449372300878167, "objective/train/docs_used": 353969, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4017493724822998, "objective/train/original_loss": 1.4017493724822998, "objective/train/theoretical_loss": 3.690682050729972, "objective/train/tokens_used": 983839200, "objective/train/value_avg": -0.00891876220703125, "objective/train/value_loss": 0.00023926046560518444, "objective/train/value_max": -0.0001926422119140625, "objective/train/value_min": -0.299072265625, "objective/train/value_reward_corr": 0.619741688664302, "objective/train/value_std": 0.0124359130859375, "objective/train/weight_avg": 1.0010510683059692, "objective/train/weighted_lm_loss": 1.403052568435669, "objective/train/weights_max": 1.2246274948120117, "objective/train/weights_min": 0.36947569251060486, "theoretical_loss": 3.690682050729972, "tokens_seen": 963379200 }, { "epoch": 0.29, "learning_rate": 0.0007152142513240251, "loss": 0.0734, "theoretical_loss": 3.690682050729972, "tokens_seen": 963379200 }, { "epoch": 0.29, "learning_rate": 0.0007151340073824426, "loss": 0.0755, "theoretical_loss": 3.6905865764188923, "tokens_seen": 963641344 }, { "epoch": 0.29, "learning_rate": 0.0007150537634408602, "loss": 0.0721, "theoretical_loss": 3.6904911353466177, "tokens_seen": 963903488 }, { "epoch": 0.29, "learning_rate": 0.0007149735194992778, "loss": 0.0754, "theoretical_loss": 3.690395727492541, "tokens_seen": 964165632 }, { "epoch": 0.29, "learning_rate": 0.0007148932755576955, "loss": 0.0732, "theoretical_loss": 3.690300352836074, "tokens_seen": 964427776 }, { "epoch": 0.29, "learning_rate": 0.000714813031616113, "loss": 0.0769, "theoretical_loss": 3.690205011356646, "tokens_seen": 964689920 }, { "epoch": 0.29, "learning_rate": 0.0007147327876745306, "loss": 0.0753, "theoretical_loss": 3.6901097030337056, "tokens_seen": 964952064 }, { "epoch": 0.29, "learning_rate": 0.0007146525437329482, "loss": 0.0727, "theoretical_loss": 3.6900144278467204, "tokens_seen": 965214208 }, { "epoch": 0.29, "learning_rate": 0.0007145722997913657, "loss": 0.0755, "theoretical_loss": 3.6899191857751736, "tokens_seen": 965476352 }, { "epoch": 0.29, "learning_rate": 0.0007144920558497834, "loss": 0.0722, "theoretical_loss": 3.6898239767985688, "tokens_seen": 965738496 }, { "epoch": 0.29, "learning_rate": 0.0007144118119082009, "loss": 0.0753, "theoretical_loss": 3.689728800896428, "tokens_seen": 966000640 }, { "epoch": 0.29, "learning_rate": 0.0007143315679666186, "loss": 0.0735, "theoretical_loss": 3.68963365804829, "tokens_seen": 966262784 }, { "epoch": 0.29, "learning_rate": 0.0007142513240250361, "loss": 0.076, "theoretical_loss": 3.689538548233713, "tokens_seen": 966524928 }, { "epoch": 0.29, "learning_rate": 0.0007141710800834536, "loss": 0.0775, "theoretical_loss": 3.6894434714322726, "tokens_seen": 966787072 }, { "epoch": 0.29, "learning_rate": 0.0007140908361418714, "loss": 0.073, "theoretical_loss": 3.689348427623563, "tokens_seen": 967049216 }, { "epoch": 0.29, "learning_rate": 0.0007140105922002889, "loss": 0.0725, "theoretical_loss": 3.689253416787197, "tokens_seen": 967311360 }, { "epoch": 0.29, "learning_rate": 0.0007139303482587065, "loss": 0.0725, "theoretical_loss": 3.6891584389028047, "tokens_seen": 967573504 }, { "epoch": 0.29, "learning_rate": 0.0007138501043171241, "loss": 0.0746, "theoretical_loss": 3.689063493950034, "tokens_seen": 967835648 }, { "epoch": 0.29, "learning_rate": 0.0007137698603755417, "loss": 0.0736, "theoretical_loss": 3.6889685819085525, "tokens_seen": 968097792 }, { "epoch": 0.29, "learning_rate": 0.0007136896164339592, "loss": 0.0744, "theoretical_loss": 3.688873702758044, "tokens_seen": 968359936 }, { "epoch": 0.29, "learning_rate": 0.0007136093724923768, "loss": 0.073, "theoretical_loss": 3.688778856478211, "tokens_seen": 968622080 }, { "epoch": 0.29, "learning_rate": 0.0007135291285507944, "loss": 0.0737, "theoretical_loss": 3.6886840430487746, "tokens_seen": 968884224 }, { "epoch": 0.29, "learning_rate": 0.0007134488846092119, "loss": 0.0726, "theoretical_loss": 3.688589262449474, "tokens_seen": 969146368 }, { "epoch": 0.29, "learning_rate": 0.0007133686406676297, "loss": 0.0753, "theoretical_loss": 3.6884945146600643, "tokens_seen": 969408512 }, { "epoch": 0.29, "learning_rate": 0.0007132883967260472, "loss": 0.0739, "theoretical_loss": 3.6883997996603215, "tokens_seen": 969670656 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.001449979841709137, "objective/train/docs_used": 356486, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4616808891296387, "objective/train/original_loss": 1.4616808891296387, "objective/train/theoretical_loss": 3.688305117430038, "objective/train/tokens_used": 990392800, "objective/train/value_avg": -0.0089111328125, "objective/train/value_loss": 0.00036317447666078806, "objective/train/value_max": -0.00010150671005249023, "objective/train/value_min": -0.70361328125, "objective/train/value_reward_corr": 0.6299372029237911, "objective/train/value_std": 0.017303466796875, "objective/train/weight_avg": 1.0016149282455444, "objective/train/weighted_lm_loss": 1.4640196561813354, "objective/train/weights_max": 1.4167561531066895, "objective/train/weights_min": 0.3715973496437073, "theoretical_loss": 3.688305117430038, "tokens_seen": 969932800 }, { "epoch": 0.29, "learning_rate": 0.0007132081527844648, "loss": 0.0745, "theoretical_loss": 3.688305117430038, "tokens_seen": 969932800 }, { "epoch": 0.29, "learning_rate": 0.0007131279088428824, "loss": 0.0743, "theoretical_loss": 3.688210467949023, "tokens_seen": 970194944 }, { "epoch": 0.29, "learning_rate": 0.0007130476649013, "loss": 0.0741, "theoretical_loss": 3.6881158511971055, "tokens_seen": 970457088 }, { "epoch": 0.29, "learning_rate": 0.0007129674209597176, "loss": 0.0745, "theoretical_loss": 3.6880212671541326, "tokens_seen": 970719232 }, { "epoch": 0.29, "learning_rate": 0.0007128871770181351, "loss": 0.0751, "theoretical_loss": 3.687926715799967, "tokens_seen": 970981376 }, { "epoch": 0.29, "learning_rate": 0.0007128069330765527, "loss": 0.0746, "theoretical_loss": 3.687832197114491, "tokens_seen": 971243520 }, { "epoch": 0.29, "learning_rate": 0.0007127266891349703, "loss": 0.0759, "theoretical_loss": 3.687737711077605, "tokens_seen": 971505664 }, { "epoch": 0.29, "learning_rate": 0.000712646445193388, "loss": 0.0738, "theoretical_loss": 3.687643257669225, "tokens_seen": 971767808 }, { "epoch": 0.29, "learning_rate": 0.0007125662012518055, "loss": 0.0748, "theoretical_loss": 3.6875488368692877, "tokens_seen": 972029952 }, { "epoch": 0.29, "learning_rate": 0.0007124859573102232, "loss": 0.0737, "theoretical_loss": 3.687454448657745, "tokens_seen": 972292096 }, { "epoch": 0.29, "learning_rate": 0.0007124057133686407, "loss": 0.0739, "theoretical_loss": 3.687360093014568, "tokens_seen": 972554240 }, { "epoch": 0.29, "learning_rate": 0.0007123254694270582, "loss": 0.0744, "theoretical_loss": 3.687265769919745, "tokens_seen": 972816384 }, { "epoch": 0.29, "learning_rate": 0.0007122452254854759, "loss": 0.0751, "theoretical_loss": 3.6871714793532826, "tokens_seen": 973078528 }, { "epoch": 0.29, "learning_rate": 0.0007121649815438934, "loss": 0.077, "theoretical_loss": 3.687077221295203, "tokens_seen": 973340672 }, { "epoch": 0.3, "learning_rate": 0.000712084737602311, "loss": 0.0724, "theoretical_loss": 3.6869829957255496, "tokens_seen": 973602816 }, { "epoch": 0.3, "learning_rate": 0.0007120044936607286, "loss": 0.0734, "theoretical_loss": 3.68688880262438, "tokens_seen": 973864960 }, { "epoch": 0.3, "learning_rate": 0.0007119242497191462, "loss": 0.0728, "theoretical_loss": 3.6867946419717716, "tokens_seen": 974127104 }, { "epoch": 0.3, "learning_rate": 0.0007118440057775638, "loss": 0.0726, "theoretical_loss": 3.6867005137478177, "tokens_seen": 974389248 }, { "epoch": 0.3, "learning_rate": 0.0007117637618359814, "loss": 0.0769, "theoretical_loss": 3.686606417932631, "tokens_seen": 974651392 }, { "epoch": 0.3, "learning_rate": 0.000711683517894399, "loss": 0.0729, "theoretical_loss": 3.6865123545063403, "tokens_seen": 974913536 }, { "epoch": 0.3, "learning_rate": 0.0007116032739528166, "loss": 0.0769, "theoretical_loss": 3.686418323449093, "tokens_seen": 975175680 }, { "epoch": 0.3, "learning_rate": 0.0007115230300112342, "loss": 0.0764, "theoretical_loss": 3.6863243247410526, "tokens_seen": 975437824 }, { "epoch": 0.3, "learning_rate": 0.0007114427860696517, "loss": 0.0734, "theoretical_loss": 3.686230358362401, "tokens_seen": 975699968 }, { "epoch": 0.3, "learning_rate": 0.0007113625421280694, "loss": 0.0738, "theoretical_loss": 3.686136424293338, "tokens_seen": 975962112 }, { "epoch": 0.3, "learning_rate": 0.0007112822981864869, "loss": 0.0742, "theoretical_loss": 3.68604252251408, "tokens_seen": 976224256 }, { "epoch": 0.3, "objective/train/advantage_avg": -0.001066173892468214, "objective/train/docs_used": 358882, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4502272605895996, "objective/train/original_loss": 1.4502272605895996, "objective/train/theoretical_loss": 3.6859486530048615, "objective/train/tokens_used": 996946400, "objective/train/value_avg": -0.011871337890625, "objective/train/value_loss": 0.0007070415304042399, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.36865234375, "objective/train/value_reward_corr": 0.9649324601528265, "objective/train/value_std": 0.037567138671875, "objective/train/weight_avg": 0.9992637634277344, "objective/train/weighted_lm_loss": 1.4516780376434326, "objective/train/weights_max": 1.3549121618270874, "objective/train/weights_min": 0.36916786432266235, "theoretical_loss": 3.6859486530048615, "tokens_seen": 976486400 }, { "epoch": 0.3, "learning_rate": 0.0007112020542449044, "loss": 0.0734, "theoretical_loss": 3.6859486530048615, "tokens_seen": 976486400 }, { "epoch": 0.3, "learning_rate": 0.0007111218103033222, "loss": 0.0734, "theoretical_loss": 3.685854815745933, "tokens_seen": 976748544 }, { "epoch": 0.3, "learning_rate": 0.0007110415663617397, "loss": 0.0743, "theoretical_loss": 3.6857610107175645, "tokens_seen": 977010688 }, { "epoch": 0.3, "learning_rate": 0.0007109613224201573, "loss": 0.077, "theoretical_loss": 3.6856672379000415, "tokens_seen": 977272832 }, { "epoch": 0.3, "learning_rate": 0.0007108810784785749, "loss": 0.0763, "theoretical_loss": 3.6855734972736682, "tokens_seen": 977534976 }, { "epoch": 0.3, "learning_rate": 0.0007108008345369925, "loss": 0.0722, "theoretical_loss": 3.685479788818766, "tokens_seen": 977797120 }, { "epoch": 0.3, "learning_rate": 0.00071072059059541, "loss": 0.0754, "theoretical_loss": 3.6853861125156717, "tokens_seen": 978059264 }, { "epoch": 0.3, "learning_rate": 0.0007106403466538276, "loss": 0.0744, "theoretical_loss": 3.6852924683447412, "tokens_seen": 978321408 }, { "epoch": 0.3, "learning_rate": 0.0007105601027122452, "loss": 0.0723, "theoretical_loss": 3.6851988562863482, "tokens_seen": 978583552 }, { "epoch": 0.3, "learning_rate": 0.0007104798587706628, "loss": 0.0747, "theoretical_loss": 3.6851052763208823, "tokens_seen": 978845696 }, { "epoch": 0.3, "learning_rate": 0.0007103996148290805, "loss": 0.0719, "theoretical_loss": 3.6850117284287505, "tokens_seen": 979107840 }, { "epoch": 0.3, "learning_rate": 0.000710319370887498, "loss": 0.0708, "theoretical_loss": 3.6849182125903774, "tokens_seen": 979369984 }, { "epoch": 0.3, "learning_rate": 0.0007102391269459157, "loss": 0.0764, "theoretical_loss": 3.6848247287862046, "tokens_seen": 979632128 }, { "epoch": 0.3, "learning_rate": 0.0007101588830043332, "loss": 0.0758, "theoretical_loss": 3.684731276996691, "tokens_seen": 979894272 }, { "epoch": 0.3, "learning_rate": 0.0007100786390627508, "loss": 0.0747, "theoretical_loss": 3.684637857202312, "tokens_seen": 980156416 }, { "epoch": 0.3, "learning_rate": 0.0007099983951211684, "loss": 0.0739, "theoretical_loss": 3.684544469383562, "tokens_seen": 980418560 }, { "epoch": 0.3, "learning_rate": 0.0007099181511795859, "loss": 0.0724, "theoretical_loss": 3.6844511135209497, "tokens_seen": 980680704 }, { "epoch": 0.3, "learning_rate": 0.0007098379072380035, "loss": 0.0731, "theoretical_loss": 3.684357789595003, "tokens_seen": 980942848 }, { "epoch": 0.3, "learning_rate": 0.0007097576632964211, "loss": 0.0755, "theoretical_loss": 3.684264497586266, "tokens_seen": 981204992 }, { "epoch": 0.3, "learning_rate": 0.0007096774193548388, "loss": 0.0752, "theoretical_loss": 3.684171237475301, "tokens_seen": 981467136 }, { "epoch": 0.3, "learning_rate": 0.0007095971754132563, "loss": 0.0752, "theoretical_loss": 3.6840780092426852, "tokens_seen": 981729280 }, { "epoch": 0.3, "learning_rate": 0.000709516931471674, "loss": 0.0736, "theoretical_loss": 3.6839848128690145, "tokens_seen": 981991424 }, { "epoch": 0.3, "learning_rate": 0.0007094366875300915, "loss": 0.076, "theoretical_loss": 3.683891648334901, "tokens_seen": 982253568 }, { "epoch": 0.3, "learning_rate": 0.000709356443588509, "loss": 0.0751, "theoretical_loss": 3.6837985156209743, "tokens_seen": 982515712 }, { "epoch": 0.3, "learning_rate": 0.0007092761996469267, "loss": 0.0773, "theoretical_loss": 3.683705414707881, "tokens_seen": 982777856 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0013032013084739447, "objective/train/docs_used": 361383, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.466055154800415, "objective/train/original_loss": 1.466055154800415, "objective/train/theoretical_loss": 3.6836123455762837, "objective/train/tokens_used": 1003500000, "objective/train/value_avg": -0.007678985595703125, "objective/train/value_loss": 0.00017884932458400726, "objective/train/value_max": -0.00016868114471435547, "objective/train/value_min": -0.6962890625, "objective/train/value_reward_corr": 0.6831618657773055, "objective/train/value_std": 0.0128631591796875, "objective/train/weight_avg": 1.0013844966888428, "objective/train/weighted_lm_loss": 1.4686609506607056, "objective/train/weights_max": 1.261329174041748, "objective/train/weights_min": 0.37812936305999756, "theoretical_loss": 3.6836123455762837, "tokens_seen": 983040000 }, { "epoch": 0.3, "learning_rate": 0.0007091959557053442, "loss": 0.0733, "theoretical_loss": 3.6836123455762837, "tokens_seen": 983040000 }, { "epoch": 0.3, "learning_rate": 0.0007091157117637619, "loss": 0.0749, "theoretical_loss": 3.683519308206863, "tokens_seen": 983302144 }, { "epoch": 0.3, "learning_rate": 0.0007090354678221794, "loss": 0.075, "theoretical_loss": 3.683426302580316, "tokens_seen": 983564288 }, { "epoch": 0.3, "learning_rate": 0.000708955223880597, "loss": 0.0761, "theoretical_loss": 3.683333328677356, "tokens_seen": 983826432 }, { "epoch": 0.3, "learning_rate": 0.0007088749799390147, "loss": 0.0706, "theoretical_loss": 3.6832403864787144, "tokens_seen": 984088576 }, { "epoch": 0.3, "learning_rate": 0.0007087947359974322, "loss": 0.0748, "theoretical_loss": 3.683147475965139, "tokens_seen": 984350720 }, { "epoch": 0.3, "learning_rate": 0.0007087144920558498, "loss": 0.0758, "theoretical_loss": 3.683054597117393, "tokens_seen": 984612864 }, { "epoch": 0.3, "learning_rate": 0.0007086342481142674, "loss": 0.0756, "theoretical_loss": 3.6829617499162595, "tokens_seen": 984875008 }, { "epoch": 0.3, "learning_rate": 0.000708554004172685, "loss": 0.075, "theoretical_loss": 3.6828689343425345, "tokens_seen": 985137152 }, { "epoch": 0.3, "learning_rate": 0.0007084737602311025, "loss": 0.074, "theoretical_loss": 3.682776150377034, "tokens_seen": 985399296 }, { "epoch": 0.3, "learning_rate": 0.0007083935162895202, "loss": 0.0762, "theoretical_loss": 3.682683398000589, "tokens_seen": 985661440 }, { "epoch": 0.3, "learning_rate": 0.0007083132723479377, "loss": 0.0771, "theoretical_loss": 3.6825906771940478, "tokens_seen": 985923584 }, { "epoch": 0.3, "learning_rate": 0.0007082330284063552, "loss": 0.0737, "theoretical_loss": 3.682497987938275, "tokens_seen": 986185728 }, { "epoch": 0.3, "learning_rate": 0.000708152784464773, "loss": 0.0731, "theoretical_loss": 3.682405330214153, "tokens_seen": 986447872 }, { "epoch": 0.3, "learning_rate": 0.0007080725405231905, "loss": 0.0716, "theoretical_loss": 3.682312704002579, "tokens_seen": 986710016 }, { "epoch": 0.3, "learning_rate": 0.0007079922965816082, "loss": 0.0761, "theoretical_loss": 3.6822201092844686, "tokens_seen": 986972160 }, { "epoch": 0.3, "learning_rate": 0.0007079120526400257, "loss": 0.0734, "theoretical_loss": 3.682127546040753, "tokens_seen": 987234304 }, { "epoch": 0.3, "learning_rate": 0.0007078318086984433, "loss": 0.075, "theoretical_loss": 3.6820350142523806, "tokens_seen": 987496448 }, { "epoch": 0.3, "learning_rate": 0.0007077515647568609, "loss": 0.0748, "theoretical_loss": 3.6819425139003155, "tokens_seen": 987758592 }, { "epoch": 0.3, "learning_rate": 0.0007076713208152784, "loss": 0.0761, "theoretical_loss": 3.6818500449655396, "tokens_seen": 988020736 }, { "epoch": 0.3, "learning_rate": 0.000707591076873696, "loss": 0.0722, "theoretical_loss": 3.6817576074290503, "tokens_seen": 988282880 }, { "epoch": 0.3, "learning_rate": 0.0007075108329321136, "loss": 0.0724, "theoretical_loss": 3.681665201271862, "tokens_seen": 988545024 }, { "epoch": 0.3, "learning_rate": 0.0007074305889905313, "loss": 0.0772, "theoretical_loss": 3.681572826475006, "tokens_seen": 988807168 }, { "epoch": 0.3, "learning_rate": 0.0007073503450489488, "loss": 0.0751, "theoretical_loss": 3.681480483019529, "tokens_seen": 989069312 }, { "epoch": 0.3, "learning_rate": 0.0007072701011073665, "loss": 0.0755, "theoretical_loss": 3.6813881708864953, "tokens_seen": 989331456 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0020013097673654556, "objective/train/docs_used": 363941, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4069637060165405, "objective/train/original_loss": 1.406963586807251, "objective/train/theoretical_loss": 3.681295890056985, "objective/train/tokens_used": 1010053600, "objective/train/value_avg": -0.00994873046875, "objective/train/value_loss": 0.00019873856217600405, "objective/train/value_max": -0.00013554096221923828, "objective/train/value_min": -0.890625, "objective/train/value_reward_corr": 0.7167314143110259, "objective/train/value_std": 0.01451873779296875, "objective/train/weight_avg": 1.0020908117294312, "objective/train/weighted_lm_loss": 1.4100981950759888, "objective/train/weights_max": 1.2156908512115479, "objective/train/weights_min": 0.36818236112594604, "theoretical_loss": 3.681295890056985, "tokens_seen": 989593600 }, { "epoch": 0.3, "learning_rate": 0.000707189857165784, "loss": 0.074, "theoretical_loss": 3.681295890056985, "tokens_seen": 989593600 }, { "epoch": 0.3, "learning_rate": 0.0007071096132242016, "loss": 0.0717, "theoretical_loss": 3.681203640512095, "tokens_seen": 989855744 }, { "epoch": 0.3, "learning_rate": 0.0007070293692826192, "loss": 0.0757, "theoretical_loss": 3.681111422232937, "tokens_seen": 990117888 }, { "epoch": 0.3, "learning_rate": 0.0007069491253410367, "loss": 0.0763, "theoretical_loss": 3.681019235200643, "tokens_seen": 990380032 }, { "epoch": 0.3, "learning_rate": 0.0007068688813994543, "loss": 0.0747, "theoretical_loss": 3.680927079396357, "tokens_seen": 990642176 }, { "epoch": 0.3, "learning_rate": 0.0007067886374578719, "loss": 0.0734, "theoretical_loss": 3.680834954801242, "tokens_seen": 990904320 }, { "epoch": 0.3, "learning_rate": 0.0007067083935162896, "loss": 0.0734, "theoretical_loss": 3.6807428613964763, "tokens_seen": 991166464 }, { "epoch": 0.3, "learning_rate": 0.0007066281495747072, "loss": 0.0777, "theoretical_loss": 3.6806507991632555, "tokens_seen": 991428608 }, { "epoch": 0.3, "learning_rate": 0.0007065479056331248, "loss": 0.0744, "theoretical_loss": 3.68055876808279, "tokens_seen": 991690752 }, { "epoch": 0.3, "learning_rate": 0.0007064676616915423, "loss": 0.079, "theoretical_loss": 3.680466768136308, "tokens_seen": 991952896 }, { "epoch": 0.3, "learning_rate": 0.0007063874177499599, "loss": 0.0758, "theoretical_loss": 3.680374799305053, "tokens_seen": 992215040 }, { "epoch": 0.3, "learning_rate": 0.0007063071738083775, "loss": 0.0742, "theoretical_loss": 3.6802828615702845, "tokens_seen": 992477184 }, { "epoch": 0.3, "learning_rate": 0.000706226929866795, "loss": 0.0764, "theoretical_loss": 3.6801909549132796, "tokens_seen": 992739328 }, { "epoch": 0.3, "learning_rate": 0.0007061466859252127, "loss": 0.0757, "theoretical_loss": 3.6800990793153305, "tokens_seen": 993001472 }, { "epoch": 0.3, "learning_rate": 0.0007060664419836302, "loss": 0.0749, "theoretical_loss": 3.6800072347577455, "tokens_seen": 993263616 }, { "epoch": 0.3, "learning_rate": 0.0007059861980420478, "loss": 0.0743, "theoretical_loss": 3.6799154212218506, "tokens_seen": 993525760 }, { "epoch": 0.3, "learning_rate": 0.0007059059541004655, "loss": 0.0726, "theoretical_loss": 3.679823638688985, "tokens_seen": 993787904 }, { "epoch": 0.3, "learning_rate": 0.000705825710158883, "loss": 0.0741, "theoretical_loss": 3.679731887140508, "tokens_seen": 994050048 }, { "epoch": 0.3, "learning_rate": 0.0007057454662173006, "loss": 0.071, "theoretical_loss": 3.6796401665577916, "tokens_seen": 994312192 }, { "epoch": 0.3, "learning_rate": 0.0007056652222757182, "loss": 0.0766, "theoretical_loss": 3.679548476922225, "tokens_seen": 994574336 }, { "epoch": 0.3, "learning_rate": 0.0007055849783341358, "loss": 0.0749, "theoretical_loss": 3.6794568182152143, "tokens_seen": 994836480 }, { "epoch": 0.3, "learning_rate": 0.0007055047343925533, "loss": 0.0756, "theoretical_loss": 3.6793651904181806, "tokens_seen": 995098624 }, { "epoch": 0.3, "learning_rate": 0.000705424490450971, "loss": 0.075, "theoretical_loss": 3.679273593512563, "tokens_seen": 995360768 }, { "epoch": 0.3, "learning_rate": 0.0007053442465093885, "loss": 0.0767, "theoretical_loss": 3.679182027479812, "tokens_seen": 995622912 }, { "epoch": 0.3, "learning_rate": 0.0007052640025678061, "loss": 0.0766, "theoretical_loss": 3.6790904923014005, "tokens_seen": 995885056 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.001141717773862183, "objective/train/docs_used": 366329, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4647668600082397, "objective/train/original_loss": 1.4647668600082397, "objective/train/theoretical_loss": 3.6789989879588125, "objective/train/tokens_used": 1016607200, "objective/train/value_avg": -0.00992584228515625, "objective/train/value_loss": 0.00031617286731489, "objective/train/value_max": -0.00011414289474487305, "objective/train/value_min": -0.49951171875, "objective/train/value_reward_corr": 0.7429311902922219, "objective/train/value_std": 0.0218963623046875, "objective/train/weight_avg": 1.0012768507003784, "objective/train/weighted_lm_loss": 1.465766429901123, "objective/train/weights_max": 1.3314474821090698, "objective/train/weights_min": 0.2260430008172989, "theoretical_loss": 3.6789989879588125, "tokens_seen": 996147200 }, { "epoch": 0.3, "learning_rate": 0.0007051837586262238, "loss": 0.0741, "theoretical_loss": 3.6789989879588125, "tokens_seen": 996147200 }, { "epoch": 0.3, "learning_rate": 0.0007051035146846413, "loss": 0.0743, "theoretical_loss": 3.6789075144335497, "tokens_seen": 996409344 }, { "epoch": 0.3, "learning_rate": 0.000705023270743059, "loss": 0.073, "theoretical_loss": 3.6788160717071303, "tokens_seen": 996671488 }, { "epoch": 0.3, "learning_rate": 0.0007049430268014765, "loss": 0.0771, "theoretical_loss": 3.678724659761087, "tokens_seen": 996933632 }, { "epoch": 0.3, "learning_rate": 0.0007048627828598941, "loss": 0.0732, "theoretical_loss": 3.6786332785769695, "tokens_seen": 997195776 }, { "epoch": 0.3, "learning_rate": 0.0007047825389183117, "loss": 0.0734, "theoretical_loss": 3.678541928136344, "tokens_seen": 997457920 }, { "epoch": 0.3, "learning_rate": 0.0007047022949767292, "loss": 0.0757, "theoretical_loss": 3.6784506084207904, "tokens_seen": 997720064 }, { "epoch": 0.3, "learning_rate": 0.0007046220510351468, "loss": 0.0779, "theoretical_loss": 3.6783593194119066, "tokens_seen": 997982208 }, { "epoch": 0.3, "learning_rate": 0.0007045418070935644, "loss": 0.076, "theoretical_loss": 3.6782680610913054, "tokens_seen": 998244352 }, { "epoch": 0.3, "learning_rate": 0.0007044615631519821, "loss": 0.079, "theoretical_loss": 3.6781768334406157, "tokens_seen": 998506496 }, { "epoch": 0.3, "learning_rate": 0.0007043813192103996, "loss": 0.0736, "theoretical_loss": 3.678085636441482, "tokens_seen": 998768640 }, { "epoch": 0.3, "learning_rate": 0.0007043010752688173, "loss": 0.0782, "theoretical_loss": 3.677994470075565, "tokens_seen": 999030784 }, { "epoch": 0.3, "learning_rate": 0.0007042208313272348, "loss": 0.0749, "theoretical_loss": 3.6779033343245406, "tokens_seen": 999292928 }, { "epoch": 0.3, "learning_rate": 0.0007041405873856524, "loss": 0.0755, "theoretical_loss": 3.677812229170101, "tokens_seen": 999555072 }, { "epoch": 0.3, "learning_rate": 0.00070406034344407, "loss": 0.0731, "theoretical_loss": 3.677721154593953, "tokens_seen": 999817216 }, { "epoch": 0.3, "learning_rate": 0.0007039800995024875, "loss": 0.0756, "theoretical_loss": 3.6776301105778213, "tokens_seen": 1000079360 }, { "epoch": 0.3, "learning_rate": 0.0007038998555609052, "loss": 0.0759, "theoretical_loss": 3.6775390971034447, "tokens_seen": 1000341504 }, { "epoch": 0.3, "learning_rate": 0.0007038196116193227, "loss": 0.0742, "theoretical_loss": 3.6774481141525777, "tokens_seen": 1000603648 }, { "epoch": 0.3, "learning_rate": 0.0007037393676777404, "loss": 0.075, "theoretical_loss": 3.6773571617069907, "tokens_seen": 1000865792 }, { "epoch": 0.3, "learning_rate": 0.000703659123736158, "loss": 0.0729, "theoretical_loss": 3.6772662397484703, "tokens_seen": 1001127936 }, { "epoch": 0.3, "learning_rate": 0.0007035788797945756, "loss": 0.0705, "theoretical_loss": 3.6771753482588183, "tokens_seen": 1001390080 }, { "epoch": 0.3, "learning_rate": 0.0007034986358529931, "loss": 0.0741, "theoretical_loss": 3.6770844872198523, "tokens_seen": 1001652224 }, { "epoch": 0.3, "learning_rate": 0.0007034183919114107, "loss": 0.0733, "theoretical_loss": 3.6769936566134045, "tokens_seen": 1001914368 }, { "epoch": 0.3, "learning_rate": 0.0007033381479698283, "loss": 0.0756, "theoretical_loss": 3.676902856421324, "tokens_seen": 1002176512 }, { "epoch": 0.3, "learning_rate": 0.0007032579040282458, "loss": 0.0758, "theoretical_loss": 3.6768120866254757, "tokens_seen": 1002438656 }, { "epoch": 0.3, "objective/train/advantage_avg": -1.4584275049855933e-05, "objective/train/docs_used": 368716, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5315086841583252, "objective/train/original_loss": 1.5315088033676147, "objective/train/theoretical_loss": 3.6767213472077387, "objective/train/tokens_used": 1023160800, "objective/train/value_avg": -0.0098419189453125, "objective/train/value_loss": 0.0005082013085484505, "objective/train/value_max": -9.244680404663086e-05, "objective/train/value_min": -0.75244140625, "objective/train/value_reward_corr": 0.6553984384647921, "objective/train/value_std": 0.019561767578125, "objective/train/weight_avg": 1.0001962184906006, "objective/train/weighted_lm_loss": 1.5310980081558228, "objective/train/weights_max": 1.727001667022705, "objective/train/weights_min": 0.06900446861982346, "theoretical_loss": 3.6767213472077387, "tokens_seen": 1002700800 }, { "epoch": 0.3, "learning_rate": 0.0007031776600866635, "loss": 0.0743, "theoretical_loss": 3.6767213472077387, "tokens_seen": 1002700800 }, { "epoch": 0.3, "learning_rate": 0.000703097416145081, "loss": 0.0729, "theoretical_loss": 3.676630638150008, "tokens_seen": 1002962944 }, { "epoch": 0.3, "learning_rate": 0.0007030171722034986, "loss": 0.072, "theoretical_loss": 3.6765399594341943, "tokens_seen": 1003225088 }, { "epoch": 0.3, "learning_rate": 0.0007029369282619163, "loss": 0.0758, "theoretical_loss": 3.676449311042225, "tokens_seen": 1003487232 }, { "epoch": 0.3, "learning_rate": 0.0007028566843203338, "loss": 0.0788, "theoretical_loss": 3.6763586929560415, "tokens_seen": 1003749376 }, { "epoch": 0.3, "learning_rate": 0.0007027764403787515, "loss": 0.0757, "theoretical_loss": 3.6762681051576003, "tokens_seen": 1004011520 }, { "epoch": 0.3, "learning_rate": 0.000702696196437169, "loss": 0.074, "theoretical_loss": 3.6761775476288747, "tokens_seen": 1004273664 }, { "epoch": 0.3, "learning_rate": 0.0007026159524955866, "loss": 0.0751, "theoretical_loss": 3.6760870203518525, "tokens_seen": 1004535808 }, { "epoch": 0.3, "learning_rate": 0.0007025357085540042, "loss": 0.0715, "theoretical_loss": 3.6759965233085383, "tokens_seen": 1004797952 }, { "epoch": 0.3, "learning_rate": 0.0007024554646124218, "loss": 0.0748, "theoretical_loss": 3.6759060564809496, "tokens_seen": 1005060096 }, { "epoch": 0.3, "learning_rate": 0.0007023752206708393, "loss": 0.0749, "theoretical_loss": 3.6758156198511216, "tokens_seen": 1005322240 }, { "epoch": 0.3, "learning_rate": 0.0007022949767292569, "loss": 0.0759, "theoretical_loss": 3.675725213401104, "tokens_seen": 1005584384 }, { "epoch": 0.3, "learning_rate": 0.0007022147327876746, "loss": 0.0758, "theoretical_loss": 3.6756348371129617, "tokens_seen": 1005846528 }, { "epoch": 0.3, "learning_rate": 0.0007021344888460921, "loss": 0.0775, "theoretical_loss": 3.6755444909687744, "tokens_seen": 1006108672 }, { "epoch": 0.3, "learning_rate": 0.0007020542449045098, "loss": 0.0745, "theoretical_loss": 3.675454174950639, "tokens_seen": 1006370816 }, { "epoch": 0.31, "learning_rate": 0.0007019740009629273, "loss": 0.0733, "theoretical_loss": 3.675363889040666, "tokens_seen": 1006632960 }, { "epoch": 0.31, "learning_rate": 0.0007018937570213449, "loss": 0.075, "theoretical_loss": 3.675273633220981, "tokens_seen": 1006895104 }, { "epoch": 0.31, "learning_rate": 0.0007018135130797625, "loss": 0.0745, "theoretical_loss": 3.6751834074737264, "tokens_seen": 1007157248 }, { "epoch": 0.31, "learning_rate": 0.00070173326913818, "loss": 0.0761, "theoretical_loss": 3.675093211781059, "tokens_seen": 1007419392 }, { "epoch": 0.31, "learning_rate": 0.0007016530251965977, "loss": 0.0773, "theoretical_loss": 3.67500304612515, "tokens_seen": 1007681536 }, { "epoch": 0.31, "learning_rate": 0.0007015727812550152, "loss": 0.0757, "theoretical_loss": 3.674912910488187, "tokens_seen": 1007943680 }, { "epoch": 0.31, "learning_rate": 0.0007014925373134329, "loss": 0.076, "theoretical_loss": 3.6748228048523726, "tokens_seen": 1008205824 }, { "epoch": 0.31, "learning_rate": 0.0007014122933718505, "loss": 0.0784, "theoretical_loss": 3.674732729199924, "tokens_seen": 1008467968 }, { "epoch": 0.31, "learning_rate": 0.0007013320494302681, "loss": 0.0741, "theoretical_loss": 3.674642683513074, "tokens_seen": 1008730112 }, { "epoch": 0.31, "learning_rate": 0.0007012518054886856, "loss": 0.0764, "theoretical_loss": 3.674552667774071, "tokens_seen": 1008992256 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0006658255588263273, "objective/train/docs_used": 370943, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.50751793384552, "objective/train/original_loss": 1.50751793384552, "objective/train/theoretical_loss": 3.6744626819651773, "objective/train/tokens_used": 1029714400, "objective/train/value_avg": -0.007167816162109375, "objective/train/value_loss": 0.00024274643510580063, "objective/train/value_max": -0.000110626220703125, "objective/train/value_min": -0.354736328125, "objective/train/value_reward_corr": 0.6235155171541196, "objective/train/value_std": 0.01125335693359375, "objective/train/weight_avg": 1.0007777214050293, "objective/train/weighted_lm_loss": 1.5075055360794067, "objective/train/weights_max": 1.4258047342300415, "objective/train/weights_min": 0.39044809341430664, "theoretical_loss": 3.6744626819651773, "tokens_seen": 1009254400 }, { "epoch": 0.31, "learning_rate": 0.0007011715615471032, "loss": 0.073, "theoretical_loss": 3.6744626819651773, "tokens_seen": 1009254400 }, { "epoch": 0.31, "learning_rate": 0.0007010913176055208, "loss": 0.0745, "theoretical_loss": 3.674372726068671, "tokens_seen": 1009516544 }, { "epoch": 0.31, "learning_rate": 0.0007010110736639383, "loss": 0.0737, "theoretical_loss": 3.6742828000668464, "tokens_seen": 1009778688 }, { "epoch": 0.31, "learning_rate": 0.000700930829722356, "loss": 0.0745, "theoretical_loss": 3.6741929039420103, "tokens_seen": 1010040832 }, { "epoch": 0.31, "learning_rate": 0.0007008505857807735, "loss": 0.0739, "theoretical_loss": 3.6741030376764865, "tokens_seen": 1010302976 }, { "epoch": 0.31, "learning_rate": 0.0007007703418391912, "loss": 0.0764, "theoretical_loss": 3.674013201252614, "tokens_seen": 1010565120 }, { "epoch": 0.31, "learning_rate": 0.0007006900978976088, "loss": 0.0749, "theoretical_loss": 3.6739233946527454, "tokens_seen": 1010827264 }, { "epoch": 0.31, "learning_rate": 0.0007006098539560264, "loss": 0.0727, "theoretical_loss": 3.6738336178592492, "tokens_seen": 1011089408 }, { "epoch": 0.31, "learning_rate": 0.0007005296100144439, "loss": 0.0792, "theoretical_loss": 3.6737438708545094, "tokens_seen": 1011351552 }, { "epoch": 0.31, "learning_rate": 0.0007004493660728615, "loss": 0.0749, "theoretical_loss": 3.673654153620924, "tokens_seen": 1011613696 }, { "epoch": 0.31, "learning_rate": 0.0007003691221312791, "loss": 0.0782, "theoretical_loss": 3.673564466140906, "tokens_seen": 1011875840 }, { "epoch": 0.31, "learning_rate": 0.0007002888781896967, "loss": 0.0746, "theoretical_loss": 3.6734748083968842, "tokens_seen": 1012137984 }, { "epoch": 0.31, "learning_rate": 0.0007002086342481143, "loss": 0.076, "theoretical_loss": 3.6733851803713016, "tokens_seen": 1012400128 }, { "epoch": 0.31, "learning_rate": 0.0007001283903065318, "loss": 0.0759, "theoretical_loss": 3.673295582046616, "tokens_seen": 1012662272 }, { "epoch": 0.31, "learning_rate": 0.0007000481463649496, "loss": 0.0755, "theoretical_loss": 3.6732060134053013, "tokens_seen": 1012924416 }, { "epoch": 0.31, "learning_rate": 0.0006999679024233671, "loss": 0.076, "theoretical_loss": 3.673116474429844, "tokens_seen": 1013186560 }, { "epoch": 0.31, "learning_rate": 0.0006998876584817846, "loss": 0.0775, "theoretical_loss": 3.673026965102748, "tokens_seen": 1013448704 }, { "epoch": 0.31, "learning_rate": 0.0006998074145402023, "loss": 0.0738, "theoretical_loss": 3.67293748540653, "tokens_seen": 1013710848 }, { "epoch": 0.31, "learning_rate": 0.0006997271705986198, "loss": 0.0737, "theoretical_loss": 3.672848035323723, "tokens_seen": 1013972992 }, { "epoch": 0.31, "learning_rate": 0.0006996469266570374, "loss": 0.0745, "theoretical_loss": 3.6727586148368743, "tokens_seen": 1014235136 }, { "epoch": 0.31, "learning_rate": 0.000699566682715455, "loss": 0.0753, "theoretical_loss": 3.672669223928545, "tokens_seen": 1014497280 }, { "epoch": 0.31, "learning_rate": 0.0006994864387738726, "loss": 0.0744, "theoretical_loss": 3.672579862581313, "tokens_seen": 1014759424 }, { "epoch": 0.31, "learning_rate": 0.0006994061948322901, "loss": 0.0746, "theoretical_loss": 3.672490530777769, "tokens_seen": 1015021568 }, { "epoch": 0.31, "learning_rate": 0.0006993259508907077, "loss": 0.0751, "theoretical_loss": 3.6724012285005196, "tokens_seen": 1015283712 }, { "epoch": 0.31, "learning_rate": 0.0006992457069491254, "loss": 0.0763, "theoretical_loss": 3.6723119557321864, "tokens_seen": 1015545856 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0006244329269975424, "objective/train/docs_used": 372984, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4833120107650757, "objective/train/original_loss": 1.4833121299743652, "objective/train/theoretical_loss": 3.6722227124554045, "objective/train/tokens_used": 1036268000, "objective/train/value_avg": -0.007251739501953125, "objective/train/value_loss": 0.00037429071380756795, "objective/train/value_max": -8.094310760498047e-05, "objective/train/value_min": -0.8642578125, "objective/train/value_reward_corr": 0.6612852682359046, "objective/train/value_std": 0.01514434814453125, "objective/train/weight_avg": 1.0007858276367188, "objective/train/weighted_lm_loss": 1.484993577003479, "objective/train/weights_max": 1.7426632642745972, "objective/train/weights_min": 0.3694700598716736, "theoretical_loss": 3.6722227124554045, "tokens_seen": 1015808000 }, { "epoch": 0.31, "learning_rate": 0.0006991654630075429, "loss": 0.0756, "theoretical_loss": 3.6722227124554045, "tokens_seen": 1015808000 }, { "epoch": 0.31, "learning_rate": 0.0006990852190659606, "loss": 0.0727, "theoretical_loss": 3.6721334986528236, "tokens_seen": 1016070144 }, { "epoch": 0.31, "learning_rate": 0.0006990049751243781, "loss": 0.0766, "theoretical_loss": 3.6720443143071106, "tokens_seen": 1016332288 }, { "epoch": 0.31, "learning_rate": 0.0006989247311827958, "loss": 0.0743, "theoretical_loss": 3.671955159400943, "tokens_seen": 1016594432 }, { "epoch": 0.31, "learning_rate": 0.0006988444872412133, "loss": 0.0763, "theoretical_loss": 3.6718660339170173, "tokens_seen": 1016856576 }, { "epoch": 0.31, "learning_rate": 0.0006987642432996308, "loss": 0.0758, "theoretical_loss": 3.6717769378380414, "tokens_seen": 1017118720 }, { "epoch": 0.31, "learning_rate": 0.0006986839993580485, "loss": 0.0748, "theoretical_loss": 3.671687871146739, "tokens_seen": 1017380864 }, { "epoch": 0.31, "learning_rate": 0.000698603755416466, "loss": 0.0731, "theoretical_loss": 3.6715988338258487, "tokens_seen": 1017643008 }, { "epoch": 0.31, "learning_rate": 0.0006985235114748837, "loss": 0.0745, "theoretical_loss": 3.6715098258581236, "tokens_seen": 1017905152 }, { "epoch": 0.31, "learning_rate": 0.0006984432675333013, "loss": 0.0761, "theoretical_loss": 3.6714208472263303, "tokens_seen": 1018167296 }, { "epoch": 0.31, "learning_rate": 0.0006983630235917189, "loss": 0.0738, "theoretical_loss": 3.6713318979132517, "tokens_seen": 1018429440 }, { "epoch": 0.31, "learning_rate": 0.0006982827796501364, "loss": 0.0752, "theoretical_loss": 3.671242977901683, "tokens_seen": 1018691584 }, { "epoch": 0.31, "learning_rate": 0.000698202535708554, "loss": 0.0762, "theoretical_loss": 3.671154087174436, "tokens_seen": 1018953728 }, { "epoch": 0.31, "learning_rate": 0.0006981222917669716, "loss": 0.0762, "theoretical_loss": 3.6710652257143366, "tokens_seen": 1019215872 }, { "epoch": 0.31, "learning_rate": 0.0006980420478253891, "loss": 0.0755, "theoretical_loss": 3.6709763935042243, "tokens_seen": 1019478016 }, { "epoch": 0.31, "learning_rate": 0.0006979618038838068, "loss": 0.0759, "theoretical_loss": 3.670887590526953, "tokens_seen": 1019740160 }, { "epoch": 0.31, "learning_rate": 0.0006978815599422243, "loss": 0.0753, "theoretical_loss": 3.6707988167653927, "tokens_seen": 1020002304 }, { "epoch": 0.31, "learning_rate": 0.0006978013160006421, "loss": 0.0749, "theoretical_loss": 3.670710072202426, "tokens_seen": 1020264448 }, { "epoch": 0.31, "learning_rate": 0.0006977210720590596, "loss": 0.0762, "theoretical_loss": 3.670621356820951, "tokens_seen": 1020526592 }, { "epoch": 0.31, "learning_rate": 0.0006976408281174772, "loss": 0.0736, "theoretical_loss": 3.67053267060388, "tokens_seen": 1020788736 }, { "epoch": 0.31, "learning_rate": 0.0006975605841758948, "loss": 0.0761, "theoretical_loss": 3.6704440135341394, "tokens_seen": 1021050880 }, { "epoch": 0.31, "learning_rate": 0.0006974803402343123, "loss": 0.0755, "theoretical_loss": 3.6703553855946702, "tokens_seen": 1021313024 }, { "epoch": 0.31, "learning_rate": 0.0006974000962927299, "loss": 0.0732, "theoretical_loss": 3.6702667867684275, "tokens_seen": 1021575168 }, { "epoch": 0.31, "learning_rate": 0.0006973198523511475, "loss": 0.0725, "theoretical_loss": 3.670178217038381, "tokens_seen": 1021837312 }, { "epoch": 0.31, "learning_rate": 0.0006972396084095651, "loss": 0.0737, "theoretical_loss": 3.670089676387515, "tokens_seen": 1022099456 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.000926980166696012, "objective/train/docs_used": 375283, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5110857486724854, "objective/train/original_loss": 1.5110857486724854, "objective/train/theoretical_loss": 3.6700011647988275, "objective/train/tokens_used": 1042821600, "objective/train/value_avg": -0.01114654541015625, "objective/train/value_loss": 0.00015908897330518812, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.3095703125, "objective/train/value_reward_corr": 0.9276620899819128, "objective/train/value_std": 0.02862548828125, "objective/train/weight_avg": 1.0010052919387817, "objective/train/weighted_lm_loss": 1.5121495723724365, "objective/train/weights_max": 1.142175316810608, "objective/train/weights_min": 0.7194867134094238, "theoretical_loss": 3.6700011647988275, "tokens_seen": 1022361600 }, { "epoch": 0.31, "learning_rate": 0.0006971593644679826, "loss": 0.0772, "theoretical_loss": 3.6700011647988275, "tokens_seen": 1022361600 }, { "epoch": 0.31, "learning_rate": 0.0006970791205264004, "loss": 0.0725, "theoretical_loss": 3.6699126822553314, "tokens_seen": 1022623744 }, { "epoch": 0.31, "learning_rate": 0.0006969988765848179, "loss": 0.0735, "theoretical_loss": 3.669824228740053, "tokens_seen": 1022885888 }, { "epoch": 0.31, "learning_rate": 0.0006969186326432354, "loss": 0.0773, "theoretical_loss": 3.6697358042360344, "tokens_seen": 1023148032 }, { "epoch": 0.31, "learning_rate": 0.0006968383887016531, "loss": 0.0755, "theoretical_loss": 3.6696474087263296, "tokens_seen": 1023410176 }, { "epoch": 0.31, "learning_rate": 0.0006967581447600706, "loss": 0.0773, "theoretical_loss": 3.6695590421940096, "tokens_seen": 1023672320 }, { "epoch": 0.31, "learning_rate": 0.0006966779008184882, "loss": 0.076, "theoretical_loss": 3.6694707046221575, "tokens_seen": 1023934464 }, { "epoch": 0.31, "learning_rate": 0.0006965976568769058, "loss": 0.074, "theoretical_loss": 3.669382395993871, "tokens_seen": 1024196608 }, { "epoch": 0.31, "learning_rate": 0.0006965174129353234, "loss": 0.0753, "theoretical_loss": 3.669294116292263, "tokens_seen": 1024458752 }, { "epoch": 0.31, "learning_rate": 0.000696437168993741, "loss": 0.0748, "theoretical_loss": 3.6692058655004605, "tokens_seen": 1024720896 }, { "epoch": 0.31, "learning_rate": 0.0006963569250521585, "loss": 0.0766, "theoretical_loss": 3.669117643601602, "tokens_seen": 1024983040 }, { "epoch": 0.31, "learning_rate": 0.0006962766811105762, "loss": 0.0718, "theoretical_loss": 3.6690294505788446, "tokens_seen": 1025245184 }, { "epoch": 0.31, "learning_rate": 0.0006961964371689938, "loss": 0.0737, "theoretical_loss": 3.668941286415355, "tokens_seen": 1025507328 }, { "epoch": 0.31, "learning_rate": 0.0006961161932274114, "loss": 0.0729, "theoretical_loss": 3.668853151094318, "tokens_seen": 1025769472 }, { "epoch": 0.31, "learning_rate": 0.0006960359492858289, "loss": 0.0727, "theoretical_loss": 3.6687650445989295, "tokens_seen": 1026031616 }, { "epoch": 0.31, "learning_rate": 0.0006959557053442466, "loss": 0.0731, "theoretical_loss": 3.6686769669124004, "tokens_seen": 1026293760 }, { "epoch": 0.31, "learning_rate": 0.0006958754614026641, "loss": 0.0744, "theoretical_loss": 3.6685889180179565, "tokens_seen": 1026555904 }, { "epoch": 0.31, "learning_rate": 0.0006957952174610816, "loss": 0.0741, "theoretical_loss": 3.668500897898837, "tokens_seen": 1026818048 }, { "epoch": 0.31, "learning_rate": 0.0006957149735194993, "loss": 0.0753, "theoretical_loss": 3.668412906538295, "tokens_seen": 1027080192 }, { "epoch": 0.31, "learning_rate": 0.0006956347295779168, "loss": 0.076, "theoretical_loss": 3.6683249439195977, "tokens_seen": 1027342336 }, { "epoch": 0.31, "learning_rate": 0.0006955544856363345, "loss": 0.0751, "theoretical_loss": 3.668237010026026, "tokens_seen": 1027604480 }, { "epoch": 0.31, "learning_rate": 0.0006954742416947521, "loss": 0.0752, "theoretical_loss": 3.668149104840876, "tokens_seen": 1027866624 }, { "epoch": 0.31, "learning_rate": 0.0006953939977531697, "loss": 0.0747, "theoretical_loss": 3.6680612283474567, "tokens_seen": 1028128768 }, { "epoch": 0.31, "learning_rate": 0.0006953137538115872, "loss": 0.0742, "theoretical_loss": 3.667973380529091, "tokens_seen": 1028390912 }, { "epoch": 0.31, "learning_rate": 0.0006952335098700048, "loss": 0.0743, "theoretical_loss": 3.6678855613691157, "tokens_seen": 1028653056 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0015299071092158556, "objective/train/docs_used": 377682, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4341442584991455, "objective/train/original_loss": 1.4341442584991455, "objective/train/theoretical_loss": 3.667797770850883, "objective/train/tokens_used": 1049375200, "objective/train/value_avg": -0.00940704345703125, "objective/train/value_loss": 0.00021609137183986604, "objective/train/value_max": -0.0001214742660522461, "objective/train/value_min": -0.354248046875, "objective/train/value_reward_corr": 0.8563500147459093, "objective/train/value_std": 0.0274658203125, "objective/train/weight_avg": 1.0016289949417114, "objective/train/weighted_lm_loss": 1.4355690479278564, "objective/train/weights_max": 1.3001129627227783, "objective/train/weights_min": 0.36912843585014343, "theoretical_loss": 3.667797770850883, "tokens_seen": 1028915200 }, { "epoch": 0.31, "learning_rate": 0.0006951532659284224, "loss": 0.0732, "theoretical_loss": 3.667797770850883, "tokens_seen": 1028915200 }, { "epoch": 0.31, "learning_rate": 0.00069507302198684, "loss": 0.0749, "theoretical_loss": 3.667710008957756, "tokens_seen": 1029177344 }, { "epoch": 0.31, "learning_rate": 0.0006949927780452576, "loss": 0.0775, "theoretical_loss": 3.667622275673115, "tokens_seen": 1029439488 }, { "epoch": 0.31, "learning_rate": 0.0006949125341036751, "loss": 0.0747, "theoretical_loss": 3.667534570980353, "tokens_seen": 1029701632 }, { "epoch": 0.31, "learning_rate": 0.0006948322901620929, "loss": 0.0761, "theoretical_loss": 3.667446894862876, "tokens_seen": 1029963776 }, { "epoch": 0.31, "learning_rate": 0.0006947520462205104, "loss": 0.073, "theoretical_loss": 3.667359247304104, "tokens_seen": 1030225920 }, { "epoch": 0.31, "learning_rate": 0.0006946718022789279, "loss": 0.0739, "theoretical_loss": 3.667271628287472, "tokens_seen": 1030488064 }, { "epoch": 0.31, "learning_rate": 0.0006945915583373456, "loss": 0.0751, "theoretical_loss": 3.6671840377964275, "tokens_seen": 1030750208 }, { "epoch": 0.31, "learning_rate": 0.0006945113143957631, "loss": 0.0754, "theoretical_loss": 3.667096475814433, "tokens_seen": 1031012352 }, { "epoch": 0.31, "learning_rate": 0.0006944310704541807, "loss": 0.0746, "theoretical_loss": 3.6670089423249643, "tokens_seen": 1031274496 }, { "epoch": 0.31, "learning_rate": 0.0006943508265125983, "loss": 0.0784, "theoretical_loss": 3.6669214373115104, "tokens_seen": 1031536640 }, { "epoch": 0.31, "learning_rate": 0.0006942705825710159, "loss": 0.0745, "theoretical_loss": 3.6668339607575744, "tokens_seen": 1031798784 }, { "epoch": 0.31, "learning_rate": 0.0006941903386294334, "loss": 0.0726, "theoretical_loss": 3.6667465126466743, "tokens_seen": 1032060928 }, { "epoch": 0.31, "learning_rate": 0.0006941100946878512, "loss": 0.0708, "theoretical_loss": 3.6666590929623393, "tokens_seen": 1032323072 }, { "epoch": 0.31, "learning_rate": 0.0006940298507462687, "loss": 0.0717, "theoretical_loss": 3.666571701688115, "tokens_seen": 1032585216 }, { "epoch": 0.31, "learning_rate": 0.0006939496068046863, "loss": 0.0767, "theoretical_loss": 3.6664843388075594, "tokens_seen": 1032847360 }, { "epoch": 0.31, "learning_rate": 0.0006938693628631039, "loss": 0.0705, "theoretical_loss": 3.6663970043042435, "tokens_seen": 1033109504 }, { "epoch": 0.31, "learning_rate": 0.0006937891189215214, "loss": 0.0717, "theoretical_loss": 3.6663096981617533, "tokens_seen": 1033371648 }, { "epoch": 0.31, "learning_rate": 0.0006937088749799391, "loss": 0.0716, "theoretical_loss": 3.6662224203636886, "tokens_seen": 1033633792 }, { "epoch": 0.31, "learning_rate": 0.0006936286310383566, "loss": 0.0744, "theoretical_loss": 3.6661351708936616, "tokens_seen": 1033895936 }, { "epoch": 0.31, "learning_rate": 0.0006935483870967742, "loss": 0.0693, "theoretical_loss": 3.6660479497352982, "tokens_seen": 1034158080 }, { "epoch": 0.31, "learning_rate": 0.0006934681431551918, "loss": 0.0753, "theoretical_loss": 3.665960756872239, "tokens_seen": 1034420224 }, { "epoch": 0.31, "learning_rate": 0.0006933878992136093, "loss": 0.0745, "theoretical_loss": 3.6658735922881376, "tokens_seen": 1034682368 }, { "epoch": 0.31, "learning_rate": 0.000693307655272027, "loss": 0.0744, "theoretical_loss": 3.665786455966661, "tokens_seen": 1034944512 }, { "epoch": 0.31, "learning_rate": 0.0006932274113304446, "loss": 0.0747, "theoretical_loss": 3.6656993478914903, "tokens_seen": 1035206656 }, { "epoch": 0.31, "objective/train/advantage_avg": -7.297027332242578e-05, "objective/train/docs_used": 379908, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4424421787261963, "objective/train/original_loss": 1.4424422979354858, "objective/train/theoretical_loss": 3.6656122680463197, "objective/train/tokens_used": 1055928800, "objective/train/value_avg": -0.00970458984375, "objective/train/value_loss": 0.0001646455639274791, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.310302734375, "objective/train/value_reward_corr": 0.9264396814702418, "objective/train/value_std": 0.0283050537109375, "objective/train/weight_avg": 1.0000029802322388, "objective/train/weighted_lm_loss": 1.4421523809432983, "objective/train/weights_max": 1.2352874279022217, "objective/train/weights_min": 0.3707435429096222, "theoretical_loss": 3.6656122680463197, "tokens_seen": 1035468800 }, { "epoch": 0.31, "learning_rate": 0.0006931471673888622, "loss": 0.0754, "theoretical_loss": 3.6656122680463197, "tokens_seen": 1035468800 }, { "epoch": 0.31, "learning_rate": 0.0006930669234472797, "loss": 0.0729, "theoretical_loss": 3.6655252164148564, "tokens_seen": 1035730944 }, { "epoch": 0.31, "learning_rate": 0.0006929866795056974, "loss": 0.0758, "theoretical_loss": 3.6654381929808233, "tokens_seen": 1035993088 }, { "epoch": 0.31, "learning_rate": 0.0006929064355641149, "loss": 0.0735, "theoretical_loss": 3.6653511977279534, "tokens_seen": 1036255232 }, { "epoch": 0.31, "learning_rate": 0.0006928261916225324, "loss": 0.0746, "theoretical_loss": 3.6652642306399965, "tokens_seen": 1036517376 }, { "epoch": 0.31, "learning_rate": 0.0006927459476809501, "loss": 0.0749, "theoretical_loss": 3.6651772917007137, "tokens_seen": 1036779520 }, { "epoch": 0.31, "learning_rate": 0.0006926657037393676, "loss": 0.0754, "theoretical_loss": 3.6650903808938806, "tokens_seen": 1037041664 }, { "epoch": 0.31, "learning_rate": 0.0006925854597977854, "loss": 0.0752, "theoretical_loss": 3.6650034982032857, "tokens_seen": 1037303808 }, { "epoch": 0.31, "learning_rate": 0.0006925052158562029, "loss": 0.0734, "theoretical_loss": 3.664916643612732, "tokens_seen": 1037565952 }, { "epoch": 0.31, "learning_rate": 0.0006924249719146205, "loss": 0.0729, "theoretical_loss": 3.6648298171060345, "tokens_seen": 1037828096 }, { "epoch": 0.31, "learning_rate": 0.0006923447279730381, "loss": 0.0726, "theoretical_loss": 3.6647430186670222, "tokens_seen": 1038090240 }, { "epoch": 0.31, "learning_rate": 0.0006922644840314556, "loss": 0.0734, "theoretical_loss": 3.6646562482795373, "tokens_seen": 1038352384 }, { "epoch": 0.31, "learning_rate": 0.0006921842400898732, "loss": 0.0721, "theoretical_loss": 3.664569505927436, "tokens_seen": 1038614528 }, { "epoch": 0.31, "learning_rate": 0.0006921039961482908, "loss": 0.0736, "theoretical_loss": 3.664482791594588, "tokens_seen": 1038876672 }, { "epoch": 0.31, "learning_rate": 0.0006920237522067084, "loss": 0.0738, "theoretical_loss": 3.664396105264875, "tokens_seen": 1039138816 }, { "epoch": 0.31, "learning_rate": 0.0006919435082651259, "loss": 0.0735, "theoretical_loss": 3.6643094469221933, "tokens_seen": 1039400960 }, { "epoch": 0.32, "learning_rate": 0.0006918632643235437, "loss": 0.0734, "theoretical_loss": 3.664222816550452, "tokens_seen": 1039663104 }, { "epoch": 0.32, "learning_rate": 0.0006917830203819612, "loss": 0.0723, "theoretical_loss": 3.6641362141335727, "tokens_seen": 1039925248 }, { "epoch": 0.32, "learning_rate": 0.0006917027764403787, "loss": 0.0731, "theoretical_loss": 3.6640496396554925, "tokens_seen": 1040187392 }, { "epoch": 0.32, "learning_rate": 0.0006916225324987964, "loss": 0.0753, "theoretical_loss": 3.6639630931001594, "tokens_seen": 1040449536 }, { "epoch": 0.32, "learning_rate": 0.0006915422885572139, "loss": 0.0737, "theoretical_loss": 3.6638765744515367, "tokens_seen": 1040711680 }, { "epoch": 0.32, "learning_rate": 0.0006914620446156316, "loss": 0.076, "theoretical_loss": 3.6637900836935993, "tokens_seen": 1040973824 }, { "epoch": 0.32, "learning_rate": 0.0006913818006740491, "loss": 0.0736, "theoretical_loss": 3.6637036208103364, "tokens_seen": 1041235968 }, { "epoch": 0.32, "learning_rate": 0.0006913015567324667, "loss": 0.0711, "theoretical_loss": 3.663617185785749, "tokens_seen": 1041498112 }, { "epoch": 0.32, "learning_rate": 0.0006912213127908843, "loss": 0.0724, "theoretical_loss": 3.6635307786038536, "tokens_seen": 1041760256 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0013375055277720094, "objective/train/docs_used": 382133, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4263135194778442, "objective/train/original_loss": 1.4263136386871338, "objective/train/theoretical_loss": 3.663444399248678, "objective/train/tokens_used": 1062482400, "objective/train/value_avg": -0.006931304931640625, "objective/train/value_loss": 0.00021545964409597218, "objective/train/value_max": -0.00010150671005249023, "objective/train/value_min": -0.296875, "objective/train/value_reward_corr": 0.5908098049024169, "objective/train/value_std": 0.01110076904296875, "objective/train/weight_avg": 1.0014337301254272, "objective/train/weighted_lm_loss": 1.4280571937561035, "objective/train/weights_max": 1.1778565645217896, "objective/train/weights_min": 0.3711100220680237, "theoretical_loss": 3.663444399248678, "tokens_seen": 1042022400 }, { "epoch": 0.32, "learning_rate": 0.000691141068849302, "loss": 0.0719, "theoretical_loss": 3.663444399248678, "tokens_seen": 1042022400 }, { "epoch": 0.32, "learning_rate": 0.0006910608249077195, "loss": 0.0754, "theoretical_loss": 3.6633580477042633, "tokens_seen": 1042284544 }, { "epoch": 0.32, "learning_rate": 0.0006909805809661371, "loss": 0.0733, "theoretical_loss": 3.663271723954665, "tokens_seen": 1042546688 }, { "epoch": 0.32, "learning_rate": 0.0006909003370245547, "loss": 0.0739, "theoretical_loss": 3.6631854279839513, "tokens_seen": 1042808832 }, { "epoch": 0.32, "learning_rate": 0.0006908200930829722, "loss": 0.0703, "theoretical_loss": 3.6630991597762024, "tokens_seen": 1043070976 }, { "epoch": 0.32, "learning_rate": 0.0006907398491413899, "loss": 0.0742, "theoretical_loss": 3.6630129193155128, "tokens_seen": 1043333120 }, { "epoch": 0.32, "learning_rate": 0.0006906596051998074, "loss": 0.0733, "theoretical_loss": 3.6629267065859894, "tokens_seen": 1043595264 }, { "epoch": 0.32, "learning_rate": 0.000690579361258225, "loss": 0.0755, "theoretical_loss": 3.662840521571753, "tokens_seen": 1043857408 }, { "epoch": 0.32, "learning_rate": 0.0006904991173166426, "loss": 0.074, "theoretical_loss": 3.662754364256937, "tokens_seen": 1044119552 }, { "epoch": 0.32, "learning_rate": 0.0006904188733750601, "loss": 0.0773, "theoretical_loss": 3.662668234625688, "tokens_seen": 1044381696 }, { "epoch": 0.32, "learning_rate": 0.0006903386294334778, "loss": 0.0719, "theoretical_loss": 3.6625821326621653, "tokens_seen": 1044643840 }, { "epoch": 0.32, "learning_rate": 0.0006902583854918954, "loss": 0.0761, "theoretical_loss": 3.6624960583505404, "tokens_seen": 1044905984 }, { "epoch": 0.32, "learning_rate": 0.000690178141550313, "loss": 0.0717, "theoretical_loss": 3.662410011675001, "tokens_seen": 1045168128 }, { "epoch": 0.32, "learning_rate": 0.0006900978976087306, "loss": 0.0748, "theoretical_loss": 3.6623239926197444, "tokens_seen": 1045430272 }, { "epoch": 0.32, "learning_rate": 0.0006900176536671482, "loss": 0.0741, "theoretical_loss": 3.6622380011689826, "tokens_seen": 1045692416 }, { "epoch": 0.32, "learning_rate": 0.0006899374097255657, "loss": 0.0735, "theoretical_loss": 3.66215203730694, "tokens_seen": 1045954560 }, { "epoch": 0.32, "learning_rate": 0.0006898571657839833, "loss": 0.0728, "theoretical_loss": 3.6620661010178543, "tokens_seen": 1046216704 }, { "epoch": 0.32, "learning_rate": 0.0006897769218424009, "loss": 0.0737, "theoretical_loss": 3.6619801922859763, "tokens_seen": 1046478848 }, { "epoch": 0.32, "learning_rate": 0.0006896966779008184, "loss": 0.0726, "theoretical_loss": 3.661894311095568, "tokens_seen": 1046740992 }, { "epoch": 0.32, "learning_rate": 0.0006896164339592362, "loss": 0.0751, "theoretical_loss": 3.6618084574309075, "tokens_seen": 1047003136 }, { "epoch": 0.32, "learning_rate": 0.0006895361900176537, "loss": 0.0726, "theoretical_loss": 3.6617226312762834, "tokens_seen": 1047265280 }, { "epoch": 0.32, "learning_rate": 0.0006894559460760713, "loss": 0.0724, "theoretical_loss": 3.6616368326159976, "tokens_seen": 1047527424 }, { "epoch": 0.32, "learning_rate": 0.0006893757021344889, "loss": 0.0747, "theoretical_loss": 3.6615510614343654, "tokens_seen": 1047789568 }, { "epoch": 0.32, "learning_rate": 0.0006892954581929064, "loss": 0.0713, "theoretical_loss": 3.661465317715715, "tokens_seen": 1048051712 }, { "epoch": 0.32, "learning_rate": 0.000689215214251324, "loss": 0.0729, "theoretical_loss": 3.6613796014443865, "tokens_seen": 1048313856 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.000845851085614413, "objective/train/docs_used": 384505, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4578531980514526, "objective/train/original_loss": 1.457853078842163, "objective/train/theoretical_loss": 3.661293912604734, "objective/train/tokens_used": 1069036000, "objective/train/value_avg": -0.00762176513671875, "objective/train/value_loss": 0.0002575255639385432, "objective/train/value_max": -6.812810897827148e-05, "objective/train/value_min": -0.26220703125, "objective/train/value_reward_corr": 0.5918434184109678, "objective/train/value_std": 0.01096343994140625, "objective/train/weight_avg": 1.0009592771530151, "objective/train/weighted_lm_loss": 1.459302544593811, "objective/train/weights_max": 1.1559641361236572, "objective/train/weights_min": 0.368507981300354, "theoretical_loss": 3.661293912604734, "tokens_seen": 1048576000 }, { "epoch": 0.32, "learning_rate": 0.0006891349703097416, "loss": 0.0725, "theoretical_loss": 3.661293912604734, "tokens_seen": 1048576000 }, { "epoch": 0.32, "learning_rate": 0.0006890547263681592, "loss": 0.0737, "theoretical_loss": 3.661208251181124, "tokens_seen": 1048838144 }, { "epoch": 0.32, "learning_rate": 0.0006889744824265767, "loss": 0.072, "theoretical_loss": 3.6611226171579356, "tokens_seen": 1049100288 }, { "epoch": 0.32, "learning_rate": 0.0006888942384849945, "loss": 0.0739, "theoretical_loss": 3.6610370105195607, "tokens_seen": 1049362432 }, { "epoch": 0.32, "learning_rate": 0.000688813994543412, "loss": 0.0753, "theoretical_loss": 3.660951431250405, "tokens_seen": 1049624576 }, { "epoch": 0.32, "learning_rate": 0.0006887337506018296, "loss": 0.071, "theoretical_loss": 3.6608658793348847, "tokens_seen": 1049886720 }, { "epoch": 0.32, "learning_rate": 0.0006886535066602472, "loss": 0.074, "theoretical_loss": 3.6607803547574314, "tokens_seen": 1050148864 }, { "epoch": 0.32, "learning_rate": 0.0006885732627186647, "loss": 0.0754, "theoretical_loss": 3.660694857502487, "tokens_seen": 1050411008 }, { "epoch": 0.32, "learning_rate": 0.0006884930187770824, "loss": 0.0729, "theoretical_loss": 3.660609387554509, "tokens_seen": 1050673152 }, { "epoch": 0.32, "learning_rate": 0.0006884127748354999, "loss": 0.0761, "theoretical_loss": 3.6605239448979647, "tokens_seen": 1050935296 }, { "epoch": 0.32, "learning_rate": 0.0006883325308939175, "loss": 0.0751, "theoretical_loss": 3.660438529517336, "tokens_seen": 1051197440 }, { "epoch": 0.32, "learning_rate": 0.0006882522869523351, "loss": 0.076, "theoretical_loss": 3.660353141397116, "tokens_seen": 1051459584 }, { "epoch": 0.32, "learning_rate": 0.0006881720430107528, "loss": 0.0765, "theoretical_loss": 3.6602677805218127, "tokens_seen": 1051721728 }, { "epoch": 0.32, "learning_rate": 0.0006880917990691703, "loss": 0.0754, "theoretical_loss": 3.660182446875944, "tokens_seen": 1051983872 }, { "epoch": 0.32, "learning_rate": 0.0006880115551275879, "loss": 0.0699, "theoretical_loss": 3.6600971404440434, "tokens_seen": 1052246016 }, { "epoch": 0.32, "learning_rate": 0.0006879313111860055, "loss": 0.0713, "theoretical_loss": 3.660011861210654, "tokens_seen": 1052508160 }, { "epoch": 0.32, "learning_rate": 0.000687851067244423, "loss": 0.0712, "theoretical_loss": 3.659926609160334, "tokens_seen": 1052770304 }, { "epoch": 0.32, "learning_rate": 0.0006877708233028407, "loss": 0.0701, "theoretical_loss": 3.6598413842776534, "tokens_seen": 1053032448 }, { "epoch": 0.32, "learning_rate": 0.0006876905793612582, "loss": 0.0729, "theoretical_loss": 3.6597561865471935, "tokens_seen": 1053294592 }, { "epoch": 0.32, "learning_rate": 0.0006876103354196759, "loss": 0.0774, "theoretical_loss": 3.6596710159535504, "tokens_seen": 1053556736 }, { "epoch": 0.32, "learning_rate": 0.0006875300914780934, "loss": 0.0753, "theoretical_loss": 3.659585872481331, "tokens_seen": 1053818880 }, { "epoch": 0.32, "learning_rate": 0.0006874498475365109, "loss": 0.0752, "theoretical_loss": 3.659500756115156, "tokens_seen": 1054081024 }, { "epoch": 0.32, "learning_rate": 0.0006873696035949287, "loss": 0.0736, "theoretical_loss": 3.659415666839658, "tokens_seen": 1054343168 }, { "epoch": 0.32, "learning_rate": 0.0006872893596533462, "loss": 0.0724, "theoretical_loss": 3.6593306046394813, "tokens_seen": 1054605312 }, { "epoch": 0.32, "learning_rate": 0.0006872091157117638, "loss": 0.075, "theoretical_loss": 3.6592455694992854, "tokens_seen": 1054867456 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.001419214648194611, "objective/train/docs_used": 386814, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3335970640182495, "objective/train/original_loss": 1.33359694480896, "objective/train/theoretical_loss": 3.659160561403739, "objective/train/tokens_used": 1075589600, "objective/train/value_avg": -0.007778167724609375, "objective/train/value_loss": 0.00025554379681125283, "objective/train/value_max": -0.00013875961303710938, "objective/train/value_min": -0.865234375, "objective/train/value_reward_corr": 0.7009334463254417, "objective/train/value_std": 0.0123443603515625, "objective/train/weight_avg": 1.0015344619750977, "objective/train/weighted_lm_loss": 1.3353464603424072, "objective/train/weights_max": 1.3698445558547974, "objective/train/weights_min": 0.3807870149612427, "theoretical_loss": 3.659160561403739, "tokens_seen": 1055129600 }, { "epoch": 0.32, "learning_rate": 0.0006871288717701814, "loss": 0.07, "theoretical_loss": 3.659160561403739, "tokens_seen": 1055129600 }, { "epoch": 0.32, "learning_rate": 0.000687048627828599, "loss": 0.0718, "theoretical_loss": 3.6590755803375252, "tokens_seen": 1055391744 }, { "epoch": 0.32, "learning_rate": 0.0006869683838870165, "loss": 0.0735, "theoretical_loss": 3.65899062628534, "tokens_seen": 1055653888 }, { "epoch": 0.32, "learning_rate": 0.0006868881399454341, "loss": 0.073, "theoretical_loss": 3.65890569923189, "tokens_seen": 1055916032 }, { "epoch": 0.32, "learning_rate": 0.0006868078960038517, "loss": 0.0743, "theoretical_loss": 3.658820799161896, "tokens_seen": 1056178176 }, { "epoch": 0.32, "learning_rate": 0.0006867276520622692, "loss": 0.0749, "theoretical_loss": 3.65873592606009, "tokens_seen": 1056440320 }, { "epoch": 0.32, "learning_rate": 0.000686647408120687, "loss": 0.0707, "theoretical_loss": 3.658651079911218, "tokens_seen": 1056702464 }, { "epoch": 0.32, "learning_rate": 0.0006865671641791045, "loss": 0.0735, "theoretical_loss": 3.658566260700036, "tokens_seen": 1056964608 }, { "epoch": 0.32, "learning_rate": 0.0006864869202375221, "loss": 0.0735, "theoretical_loss": 3.658481468411315, "tokens_seen": 1057226752 }, { "epoch": 0.32, "learning_rate": 0.0006864066762959397, "loss": 0.0715, "theoretical_loss": 3.6583967030298368, "tokens_seen": 1057488896 }, { "epoch": 0.32, "learning_rate": 0.0006863264323543572, "loss": 0.0746, "theoretical_loss": 3.6583119645403954, "tokens_seen": 1057751040 }, { "epoch": 0.32, "learning_rate": 0.0006862461884127749, "loss": 0.0709, "theoretical_loss": 3.658227252927799, "tokens_seen": 1058013184 }, { "epoch": 0.32, "learning_rate": 0.0006861659444711924, "loss": 0.0704, "theoretical_loss": 3.6581425681768653, "tokens_seen": 1058275328 }, { "epoch": 0.32, "learning_rate": 0.00068608570052961, "loss": 0.072, "theoretical_loss": 3.6580579102724267, "tokens_seen": 1058537472 }, { "epoch": 0.32, "learning_rate": 0.0006860054565880276, "loss": 0.0734, "theoretical_loss": 3.657973279199327, "tokens_seen": 1058799616 }, { "epoch": 0.32, "learning_rate": 0.0006859252126464453, "loss": 0.0734, "theoretical_loss": 3.6578886749424226, "tokens_seen": 1059061760 }, { "epoch": 0.32, "learning_rate": 0.0006858449687048628, "loss": 0.077, "theoretical_loss": 3.657804097486581, "tokens_seen": 1059323904 }, { "epoch": 0.32, "learning_rate": 0.0006857647247632804, "loss": 0.0705, "theoretical_loss": 3.657719546816685, "tokens_seen": 1059586048 }, { "epoch": 0.32, "learning_rate": 0.000685684480821698, "loss": 0.0709, "theoretical_loss": 3.657635022917626, "tokens_seen": 1059848192 }, { "epoch": 0.32, "learning_rate": 0.0006856042368801155, "loss": 0.0715, "theoretical_loss": 3.65755052577431, "tokens_seen": 1060110336 }, { "epoch": 0.32, "learning_rate": 0.0006855239929385332, "loss": 0.071, "theoretical_loss": 3.657466055371654, "tokens_seen": 1060372480 }, { "epoch": 0.32, "learning_rate": 0.0006854437489969507, "loss": 0.0721, "theoretical_loss": 3.657381611694588, "tokens_seen": 1060634624 }, { "epoch": 0.32, "learning_rate": 0.0006853635050553683, "loss": 0.0731, "theoretical_loss": 3.6572971947280544, "tokens_seen": 1060896768 }, { "epoch": 0.32, "learning_rate": 0.0006852832611137859, "loss": 0.0764, "theoretical_loss": 3.6572128044570067, "tokens_seen": 1061158912 }, { "epoch": 0.32, "learning_rate": 0.0006852030171722034, "loss": 0.0724, "theoretical_loss": 3.657128440866412, "tokens_seen": 1061421056 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0005861930549144745, "objective/train/docs_used": 389244, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5149002075195312, "objective/train/original_loss": 1.5149000883102417, "objective/train/theoretical_loss": 3.6570441039412485, "objective/train/tokens_used": 1082143200, "objective/train/value_avg": -0.00844573974609375, "objective/train/value_loss": 0.00028737890534102917, "objective/train/value_max": -9.918212890625e-05, "objective/train/value_min": -0.37060546875, "objective/train/value_reward_corr": 0.6168363103967418, "objective/train/value_std": 0.01305389404296875, "objective/train/weight_avg": 1.0007121562957764, "objective/train/weighted_lm_loss": 1.5156128406524658, "objective/train/weights_max": 1.3453842401504517, "objective/train/weights_min": 0.36841732263565063, "theoretical_loss": 3.6570441039412485, "tokens_seen": 1061683200 }, { "epoch": 0.32, "learning_rate": 0.0006851227732306211, "loss": 0.073, "theoretical_loss": 3.6570441039412485, "tokens_seen": 1061683200 }, { "epoch": 0.32, "learning_rate": 0.0006850425292890387, "loss": 0.0734, "theoretical_loss": 3.6569597936665064, "tokens_seen": 1061945344 }, { "epoch": 0.32, "learning_rate": 0.0006849622853474563, "loss": 0.0753, "theoretical_loss": 3.6568755100271897, "tokens_seen": 1062207488 }, { "epoch": 0.32, "learning_rate": 0.0006848820414058739, "loss": 0.0727, "theoretical_loss": 3.656791253008313, "tokens_seen": 1062469632 }, { "epoch": 0.32, "learning_rate": 0.0006848017974642915, "loss": 0.0738, "theoretical_loss": 3.6567070225949028, "tokens_seen": 1062731776 }, { "epoch": 0.32, "learning_rate": 0.000684721553522709, "loss": 0.0727, "theoretical_loss": 3.656622818771999, "tokens_seen": 1062993920 }, { "epoch": 0.32, "learning_rate": 0.0006846413095811267, "loss": 0.0711, "theoretical_loss": 3.6565386415246524, "tokens_seen": 1063256064 }, { "epoch": 0.32, "learning_rate": 0.0006845610656395442, "loss": 0.0719, "theoretical_loss": 3.6564544908379273, "tokens_seen": 1063518208 }, { "epoch": 0.32, "learning_rate": 0.0006844808216979617, "loss": 0.0723, "theoretical_loss": 3.6563703666968985, "tokens_seen": 1063780352 }, { "epoch": 0.32, "learning_rate": 0.0006844005777563795, "loss": 0.0708, "theoretical_loss": 3.656286269086653, "tokens_seen": 1064042496 }, { "epoch": 0.32, "learning_rate": 0.000684320333814797, "loss": 0.0734, "theoretical_loss": 3.6562021979922923, "tokens_seen": 1064304640 }, { "epoch": 0.32, "learning_rate": 0.0006842400898732146, "loss": 0.0752, "theoretical_loss": 3.6561181533989267, "tokens_seen": 1064566784 }, { "epoch": 0.32, "learning_rate": 0.0006841598459316322, "loss": 0.0731, "theoretical_loss": 3.6560341352916796, "tokens_seen": 1064828928 }, { "epoch": 0.32, "learning_rate": 0.0006840796019900498, "loss": 0.0729, "theoretical_loss": 3.655950143655688, "tokens_seen": 1065091072 }, { "epoch": 0.32, "learning_rate": 0.0006839993580484673, "loss": 0.0733, "theoretical_loss": 3.655866178476098, "tokens_seen": 1065353216 }, { "epoch": 0.32, "learning_rate": 0.0006839191141068849, "loss": 0.0729, "theoretical_loss": 3.65578223973807, "tokens_seen": 1065615360 }, { "epoch": 0.32, "learning_rate": 0.0006838388701653025, "loss": 0.077, "theoretical_loss": 3.6556983274267765, "tokens_seen": 1065877504 }, { "epoch": 0.32, "learning_rate": 0.0006837586262237201, "loss": 0.0728, "theoretical_loss": 3.6556144415273994, "tokens_seen": 1066139648 }, { "epoch": 0.32, "learning_rate": 0.0006836783822821378, "loss": 0.0735, "theoretical_loss": 3.655530582025136, "tokens_seen": 1066401792 }, { "epoch": 0.32, "learning_rate": 0.0006835981383405553, "loss": 0.072, "theoretical_loss": 3.6554467489051925, "tokens_seen": 1066663936 }, { "epoch": 0.32, "learning_rate": 0.000683517894398973, "loss": 0.0746, "theoretical_loss": 3.6553629421527885, "tokens_seen": 1066926080 }, { "epoch": 0.32, "learning_rate": 0.0006834376504573905, "loss": 0.0744, "theoretical_loss": 3.655279161753156, "tokens_seen": 1067188224 }, { "epoch": 0.32, "learning_rate": 0.000683357406515808, "loss": 0.0713, "theoretical_loss": 3.6551954076915374, "tokens_seen": 1067450368 }, { "epoch": 0.32, "learning_rate": 0.0006832771625742257, "loss": 0.0727, "theoretical_loss": 3.655111679953188, "tokens_seen": 1067712512 }, { "epoch": 0.32, "learning_rate": 0.0006831969186326432, "loss": 0.0737, "theoretical_loss": 3.6550279785233757, "tokens_seen": 1067974656 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.001482620951719582, "objective/train/docs_used": 391774, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.458082914352417, "objective/train/original_loss": 1.458082914352417, "objective/train/theoretical_loss": 3.654944303387378, "objective/train/tokens_used": 1088696800, "objective/train/value_avg": -0.00849151611328125, "objective/train/value_loss": 0.00018974917475134134, "objective/train/value_max": -0.00010472536087036133, "objective/train/value_min": -0.64208984375, "objective/train/value_reward_corr": 0.7253735317584146, "objective/train/value_std": 0.0149993896484375, "objective/train/weight_avg": 1.0015714168548584, "objective/train/weighted_lm_loss": 1.4602124691009521, "objective/train/weights_max": 1.292288899421692, "objective/train/weights_min": 0.36969563364982605, "theoretical_loss": 3.654944303387378, "tokens_seen": 1068236800 }, { "epoch": 0.32, "learning_rate": 0.0006831166746910608, "loss": 0.0736, "theoretical_loss": 3.654944303387378, "tokens_seen": 1068236800 }, { "epoch": 0.32, "learning_rate": 0.0006830364307494784, "loss": 0.073, "theoretical_loss": 3.654860654530486, "tokens_seen": 1068498944 }, { "epoch": 0.32, "learning_rate": 0.0006829561868078961, "loss": 0.0741, "theoretical_loss": 3.6547770319380026, "tokens_seen": 1068761088 }, { "epoch": 0.32, "learning_rate": 0.0006828759428663136, "loss": 0.0712, "theoretical_loss": 3.6546934355952425, "tokens_seen": 1069023232 }, { "epoch": 0.32, "learning_rate": 0.0006827956989247312, "loss": 0.0743, "theoretical_loss": 3.6546098654875303, "tokens_seen": 1069285376 }, { "epoch": 0.32, "learning_rate": 0.0006827154549831488, "loss": 0.0743, "theoretical_loss": 3.654526321600205, "tokens_seen": 1069547520 }, { "epoch": 0.32, "learning_rate": 0.0006826352110415663, "loss": 0.0728, "theoretical_loss": 3.6544428039186165, "tokens_seen": 1069809664 }, { "epoch": 0.32, "learning_rate": 0.000682554967099984, "loss": 0.0715, "theoretical_loss": 3.6543593124281264, "tokens_seen": 1070071808 }, { "epoch": 0.32, "learning_rate": 0.0006824747231584015, "loss": 0.0738, "theoretical_loss": 3.654275847114107, "tokens_seen": 1070333952 }, { "epoch": 0.32, "learning_rate": 0.0006823944792168192, "loss": 0.0761, "theoretical_loss": 3.6541924079619443, "tokens_seen": 1070596096 }, { "epoch": 0.32, "learning_rate": 0.0006823142352752367, "loss": 0.0741, "theoretical_loss": 3.654108994957034, "tokens_seen": 1070858240 }, { "epoch": 0.32, "learning_rate": 0.0006822339913336542, "loss": 0.075, "theoretical_loss": 3.654025608084786, "tokens_seen": 1071120384 }, { "epoch": 0.32, "learning_rate": 0.000682153747392072, "loss": 0.0743, "theoretical_loss": 3.653942247330619, "tokens_seen": 1071382528 }, { "epoch": 0.32, "learning_rate": 0.0006820735034504895, "loss": 0.0725, "theoretical_loss": 3.653858912679966, "tokens_seen": 1071644672 }, { "epoch": 0.32, "learning_rate": 0.0006819932595089071, "loss": 0.0735, "theoretical_loss": 3.6537756041182696, "tokens_seen": 1071906816 }, { "epoch": 0.32, "learning_rate": 0.0006819130155673247, "loss": 0.0738, "theoretical_loss": 3.6536923216309862, "tokens_seen": 1072168960 }, { "epoch": 0.32, "learning_rate": 0.0006818327716257423, "loss": 0.0743, "theoretical_loss": 3.653609065203582, "tokens_seen": 1072431104 }, { "epoch": 0.33, "learning_rate": 0.0006817525276841598, "loss": 0.0724, "theoretical_loss": 3.6535258348215356, "tokens_seen": 1072693248 }, { "epoch": 0.33, "learning_rate": 0.0006816722837425775, "loss": 0.0747, "theoretical_loss": 3.653442630470337, "tokens_seen": 1072955392 }, { "epoch": 0.33, "learning_rate": 0.000681592039800995, "loss": 0.074, "theoretical_loss": 3.653359452135488, "tokens_seen": 1073217536 }, { "epoch": 0.33, "learning_rate": 0.0006815117958594125, "loss": 0.0735, "theoretical_loss": 3.653276299802503, "tokens_seen": 1073479680 }, { "epoch": 0.33, "learning_rate": 0.0006814315519178303, "loss": 0.0736, "theoretical_loss": 3.6531931734569056, "tokens_seen": 1073741824 }, { "epoch": 0.33, "learning_rate": 0.0006813513079762478, "loss": 0.0724, "theoretical_loss": 3.6531100730842336, "tokens_seen": 1074003968 }, { "epoch": 0.33, "learning_rate": 0.0006812710640346655, "loss": 0.0746, "theoretical_loss": 3.653026998670035, "tokens_seen": 1074266112 }, { "epoch": 0.33, "learning_rate": 0.000681190820093083, "loss": 0.0733, "theoretical_loss": 3.652943950199869, "tokens_seen": 1074528256 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.0008113806834444404, "objective/train/docs_used": 394219, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4396977424621582, "objective/train/original_loss": 1.4396977424621582, "objective/train/theoretical_loss": 3.652860927659307, "objective/train/tokens_used": 1095250400, "objective/train/value_avg": -0.007663726806640625, "objective/train/value_loss": 0.00024871714413166046, "objective/train/value_max": -0.00011146068572998047, "objective/train/value_min": -0.373046875, "objective/train/value_reward_corr": 0.6646306086042744, "objective/train/value_std": 0.01288604736328125, "objective/train/weight_avg": 1.0009232759475708, "objective/train/weighted_lm_loss": 1.4412873983383179, "objective/train/weights_max": 1.1857911348342896, "objective/train/weights_min": 0.3686436712741852, "theoretical_loss": 3.652860927659307, "tokens_seen": 1074790400 }, { "epoch": 0.33, "learning_rate": 0.0006811105761515006, "loss": 0.0734, "theoretical_loss": 3.652860927659307, "tokens_seen": 1074790400 }, { "epoch": 0.33, "learning_rate": 0.0006810303322099182, "loss": 0.0727, "theoretical_loss": 3.6527779310339326, "tokens_seen": 1075052544 }, { "epoch": 0.33, "learning_rate": 0.0006809500882683357, "loss": 0.0742, "theoretical_loss": 3.652694960309339, "tokens_seen": 1075314688 }, { "epoch": 0.33, "learning_rate": 0.0006808698443267533, "loss": 0.0751, "theoretical_loss": 3.6526120154711332, "tokens_seen": 1075576832 }, { "epoch": 0.33, "learning_rate": 0.0006807896003851709, "loss": 0.072, "theoretical_loss": 3.6525290965049324, "tokens_seen": 1075838976 }, { "epoch": 0.33, "learning_rate": 0.0006807093564435886, "loss": 0.0738, "theoretical_loss": 3.652446203396365, "tokens_seen": 1076101120 }, { "epoch": 0.33, "learning_rate": 0.0006806291125020061, "loss": 0.0729, "theoretical_loss": 3.6523633361310717, "tokens_seen": 1076363264 }, { "epoch": 0.33, "learning_rate": 0.0006805488685604238, "loss": 0.0737, "theoretical_loss": 3.6522804946947045, "tokens_seen": 1076625408 }, { "epoch": 0.33, "learning_rate": 0.0006804686246188413, "loss": 0.0728, "theoretical_loss": 3.6521976790729265, "tokens_seen": 1076887552 }, { "epoch": 0.33, "learning_rate": 0.0006803883806772588, "loss": 0.0727, "theoretical_loss": 3.652114889251412, "tokens_seen": 1077149696 }, { "epoch": 0.33, "learning_rate": 0.0006803081367356765, "loss": 0.0763, "theoretical_loss": 3.6520321252158485, "tokens_seen": 1077411840 }, { "epoch": 0.33, "learning_rate": 0.000680227892794094, "loss": 0.0724, "theoretical_loss": 3.651949386951933, "tokens_seen": 1077673984 }, { "epoch": 0.33, "learning_rate": 0.0006801476488525116, "loss": 0.0729, "theoretical_loss": 3.6518666744453734, "tokens_seen": 1077936128 }, { "epoch": 0.33, "learning_rate": 0.0006800674049109292, "loss": 0.073, "theoretical_loss": 3.651783987681892, "tokens_seen": 1078198272 }, { "epoch": 0.33, "learning_rate": 0.0006799871609693469, "loss": 0.0734, "theoretical_loss": 3.6517013266472187, "tokens_seen": 1078460416 }, { "epoch": 0.33, "learning_rate": 0.0006799069170277645, "loss": 0.0736, "theoretical_loss": 3.651618691327098, "tokens_seen": 1078722560 }, { "epoch": 0.33, "learning_rate": 0.000679826673086182, "loss": 0.0749, "theoretical_loss": 3.651536081707284, "tokens_seen": 1078984704 }, { "epoch": 0.33, "learning_rate": 0.0006797464291445996, "loss": 0.0758, "theoretical_loss": 3.651453497773543, "tokens_seen": 1079246848 }, { "epoch": 0.33, "learning_rate": 0.0006796661852030172, "loss": 0.0746, "theoretical_loss": 3.6513709395116516, "tokens_seen": 1079508992 }, { "epoch": 0.33, "learning_rate": 0.0006795859412614348, "loss": 0.0714, "theoretical_loss": 3.651288406907399, "tokens_seen": 1079771136 }, { "epoch": 0.33, "learning_rate": 0.0006795056973198523, "loss": 0.0737, "theoretical_loss": 3.6512058999465844, "tokens_seen": 1080033280 }, { "epoch": 0.33, "learning_rate": 0.00067942545337827, "loss": 0.0732, "theoretical_loss": 3.6511234186150197, "tokens_seen": 1080295424 }, { "epoch": 0.33, "learning_rate": 0.0006793452094366875, "loss": 0.0742, "theoretical_loss": 3.6510409628985263, "tokens_seen": 1080557568 }, { "epoch": 0.33, "learning_rate": 0.000679264965495105, "loss": 0.0733, "theoretical_loss": 3.6509585327829392, "tokens_seen": 1080819712 }, { "epoch": 0.33, "learning_rate": 0.0006791847215535228, "loss": 0.073, "theoretical_loss": 3.6508761282541027, "tokens_seen": 1081081856 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.0017559973057359457, "objective/train/docs_used": 396489, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5023612976074219, "objective/train/original_loss": 1.5023612976074219, "objective/train/theoretical_loss": 3.6507937492978733, "objective/train/tokens_used": 1101804000, "objective/train/value_avg": -0.0075531005859375, "objective/train/value_loss": 0.0002941191487479955, "objective/train/value_max": -0.00011962652206420898, "objective/train/value_min": -0.9130859375, "objective/train/value_reward_corr": 0.6127776569660509, "objective/train/value_std": 0.0134735107421875, "objective/train/weight_avg": 1.0018857717514038, "objective/train/weighted_lm_loss": 1.5048304796218872, "objective/train/weights_max": 1.7268811464309692, "objective/train/weights_min": 0.3711496591567993, "theoretical_loss": 3.6507937492978733, "tokens_seen": 1081344000 }, { "epoch": 0.33, "learning_rate": 0.0006791044776119403, "loss": 0.0738, "theoretical_loss": 3.6507937492978733, "tokens_seen": 1081344000 }, { "epoch": 0.33, "learning_rate": 0.0006790242336703579, "loss": 0.0712, "theoretical_loss": 3.6507113959001183, "tokens_seen": 1081606144 }, { "epoch": 0.33, "learning_rate": 0.0006789439897287755, "loss": 0.077, "theoretical_loss": 3.6506290680467166, "tokens_seen": 1081868288 }, { "epoch": 0.33, "learning_rate": 0.0006788637457871931, "loss": 0.0739, "theoretical_loss": 3.650546765723558, "tokens_seen": 1082130432 }, { "epoch": 0.33, "learning_rate": 0.0006787835018456106, "loss": 0.0731, "theoretical_loss": 3.650464488916544, "tokens_seen": 1082392576 }, { "epoch": 0.33, "learning_rate": 0.0006787032579040283, "loss": 0.0746, "theoretical_loss": 3.650382237611587, "tokens_seen": 1082654720 }, { "epoch": 0.33, "learning_rate": 0.0006786230139624458, "loss": 0.0763, "theoretical_loss": 3.65030001179461, "tokens_seen": 1082916864 }, { "epoch": 0.33, "learning_rate": 0.0006785427700208634, "loss": 0.0735, "theoretical_loss": 3.650217811451548, "tokens_seen": 1083179008 }, { "epoch": 0.33, "learning_rate": 0.0006784625260792811, "loss": 0.0728, "theoretical_loss": 3.650135636568347, "tokens_seen": 1083441152 }, { "epoch": 0.33, "learning_rate": 0.0006783822821376986, "loss": 0.0734, "theoretical_loss": 3.6500534871309642, "tokens_seen": 1083703296 }, { "epoch": 0.33, "learning_rate": 0.0006783020381961163, "loss": 0.0736, "theoretical_loss": 3.649971363125368, "tokens_seen": 1083965440 }, { "epoch": 0.33, "learning_rate": 0.0006782217942545338, "loss": 0.0738, "theoretical_loss": 3.6498892645375367, "tokens_seen": 1084227584 }, { "epoch": 0.33, "learning_rate": 0.0006781415503129514, "loss": 0.0727, "theoretical_loss": 3.649807191353462, "tokens_seen": 1084489728 }, { "epoch": 0.33, "learning_rate": 0.000678061306371369, "loss": 0.0742, "theoretical_loss": 3.6497251435591442, "tokens_seen": 1084751872 }, { "epoch": 0.33, "learning_rate": 0.0006779810624297865, "loss": 0.0777, "theoretical_loss": 3.6496431211405973, "tokens_seen": 1085014016 }, { "epoch": 0.33, "learning_rate": 0.0006779008184882041, "loss": 0.0714, "theoretical_loss": 3.649561124083844, "tokens_seen": 1085276160 }, { "epoch": 0.33, "learning_rate": 0.0006778205745466217, "loss": 0.0752, "theoretical_loss": 3.6494791523749193, "tokens_seen": 1085538304 }, { "epoch": 0.33, "learning_rate": 0.0006777403306050394, "loss": 0.0739, "theoretical_loss": 3.6493972059998696, "tokens_seen": 1085800448 }, { "epoch": 0.33, "learning_rate": 0.0006776600866634569, "loss": 0.0734, "theoretical_loss": 3.649315284944751, "tokens_seen": 1086062592 }, { "epoch": 0.33, "learning_rate": 0.0006775798427218746, "loss": 0.072, "theoretical_loss": 3.649233389195632, "tokens_seen": 1086324736 }, { "epoch": 0.33, "learning_rate": 0.0006774995987802921, "loss": 0.0735, "theoretical_loss": 3.6491515187385914, "tokens_seen": 1086586880 }, { "epoch": 0.33, "learning_rate": 0.0006774193548387097, "loss": 0.071, "theoretical_loss": 3.649069673559719, "tokens_seen": 1086849024 }, { "epoch": 0.33, "learning_rate": 0.0006773391108971273, "loss": 0.0731, "theoretical_loss": 3.648987853645116, "tokens_seen": 1087111168 }, { "epoch": 0.33, "learning_rate": 0.0006772588669555448, "loss": 0.0715, "theoretical_loss": 3.648906058980894, "tokens_seen": 1087373312 }, { "epoch": 0.33, "learning_rate": 0.0006771786230139625, "loss": 0.0735, "theoretical_loss": 3.6488242895531764, "tokens_seen": 1087635456 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.00033141745370812714, "objective/train/docs_used": 398862, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3994327783584595, "objective/train/original_loss": 1.39943265914917, "objective/train/theoretical_loss": 3.6487425453480973, "objective/train/tokens_used": 1108357600, "objective/train/value_avg": -0.01074981689453125, "objective/train/value_loss": 0.0003049577062483877, "objective/train/value_max": -0.0001499652862548828, "objective/train/value_min": -0.64697265625, "objective/train/value_reward_corr": 0.7564707177647516, "objective/train/value_std": 0.0179290771484375, "objective/train/weight_avg": 1.0004724264144897, "objective/train/weighted_lm_loss": 1.4000054597854614, "objective/train/weights_max": 1.461032509803772, "objective/train/weights_min": 0.3686436712741852, "theoretical_loss": 3.6487425453480973, "tokens_seen": 1087897600 }, { "epoch": 0.33, "learning_rate": 0.00067709837907238, "loss": 0.0722, "theoretical_loss": 3.6487425453480973, "tokens_seen": 1087897600 }, { "epoch": 0.33, "learning_rate": 0.0006770181351307977, "loss": 0.0712, "theoretical_loss": 3.648660826351801, "tokens_seen": 1088159744 }, { "epoch": 0.33, "learning_rate": 0.0006769378911892153, "loss": 0.0736, "theoretical_loss": 3.6485791325504437, "tokens_seen": 1088421888 }, { "epoch": 0.33, "learning_rate": 0.0006768576472476328, "loss": 0.0729, "theoretical_loss": 3.648497463930192, "tokens_seen": 1088684032 }, { "epoch": 0.33, "learning_rate": 0.0006767774033060504, "loss": 0.0729, "theoretical_loss": 3.6484158204772235, "tokens_seen": 1088946176 }, { "epoch": 0.33, "learning_rate": 0.000676697159364468, "loss": 0.0749, "theoretical_loss": 3.648334202177727, "tokens_seen": 1089208320 }, { "epoch": 0.33, "learning_rate": 0.0006766169154228856, "loss": 0.0762, "theoretical_loss": 3.648252609017902, "tokens_seen": 1089470464 }, { "epoch": 0.33, "learning_rate": 0.0006765366714813031, "loss": 0.0741, "theoretical_loss": 3.648171040983959, "tokens_seen": 1089732608 }, { "epoch": 0.33, "learning_rate": 0.0006764564275397208, "loss": 0.0762, "theoretical_loss": 3.648089498062119, "tokens_seen": 1089994752 }, { "epoch": 0.33, "learning_rate": 0.0006763761835981383, "loss": 0.0752, "theoretical_loss": 3.648007980238614, "tokens_seen": 1090256896 }, { "epoch": 0.33, "learning_rate": 0.0006762959396565558, "loss": 0.0723, "theoretical_loss": 3.6479264874996877, "tokens_seen": 1090519040 }, { "epoch": 0.33, "learning_rate": 0.0006762156957149736, "loss": 0.0728, "theoretical_loss": 3.6478450198315926, "tokens_seen": 1090781184 }, { "epoch": 0.33, "learning_rate": 0.0006761354517733911, "loss": 0.0753, "theoretical_loss": 3.6477635772205947, "tokens_seen": 1091043328 }, { "epoch": 0.33, "learning_rate": 0.0006760552078318088, "loss": 0.075, "theoretical_loss": 3.647682159652969, "tokens_seen": 1091305472 }, { "epoch": 0.33, "learning_rate": 0.0006759749638902263, "loss": 0.074, "theoretical_loss": 3.647600767115002, "tokens_seen": 1091567616 }, { "epoch": 0.33, "learning_rate": 0.0006758947199486439, "loss": 0.0758, "theoretical_loss": 3.6475193995929907, "tokens_seen": 1091829760 }, { "epoch": 0.33, "learning_rate": 0.0006758144760070615, "loss": 0.0736, "theoretical_loss": 3.6474380570732423, "tokens_seen": 1092091904 }, { "epoch": 0.33, "learning_rate": 0.000675734232065479, "loss": 0.0749, "theoretical_loss": 3.6473567395420767, "tokens_seen": 1092354048 }, { "epoch": 0.33, "learning_rate": 0.0006756539881238966, "loss": 0.0728, "theoretical_loss": 3.647275446985822, "tokens_seen": 1092616192 }, { "epoch": 0.33, "learning_rate": 0.0006755737441823142, "loss": 0.074, "theoretical_loss": 3.64719417939082, "tokens_seen": 1092878336 }, { "epoch": 0.33, "learning_rate": 0.0006754935002407319, "loss": 0.0759, "theoretical_loss": 3.6471129367434205, "tokens_seen": 1093140480 }, { "epoch": 0.33, "learning_rate": 0.0006754132562991494, "loss": 0.0715, "theoretical_loss": 3.647031719029985, "tokens_seen": 1093402624 }, { "epoch": 0.33, "learning_rate": 0.0006753330123575671, "loss": 0.073, "theoretical_loss": 3.646950526236887, "tokens_seen": 1093664768 }, { "epoch": 0.33, "learning_rate": 0.0006752527684159846, "loss": 0.0741, "theoretical_loss": 3.6468693583505085, "tokens_seen": 1093926912 }, { "epoch": 0.33, "learning_rate": 0.0006751725244744022, "loss": 0.0759, "theoretical_loss": 3.646788215357244, "tokens_seen": 1094189056 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.000770938815549016, "objective/train/docs_used": 401268, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4965838193893433, "objective/train/original_loss": 1.4965837001800537, "objective/train/theoretical_loss": 3.646707097243498, "objective/train/tokens_used": 1114911200, "objective/train/value_avg": -0.00963592529296875, "objective/train/value_loss": 0.0003326190635561943, "objective/train/value_max": -0.0001634359359741211, "objective/train/value_min": -0.9462890625, "objective/train/value_reward_corr": 0.7103235067802546, "objective/train/value_std": 0.0166168212890625, "objective/train/weight_avg": 1.0009206533432007, "objective/train/weighted_lm_loss": 1.4972987174987793, "objective/train/weights_max": 1.833900809288025, "objective/train/weights_min": 0.36999329924583435, "theoretical_loss": 3.646707097243498, "tokens_seen": 1094451200 }, { "epoch": 0.33, "learning_rate": 0.0006750922805328198, "loss": 0.0734, "theoretical_loss": 3.646707097243498, "tokens_seen": 1094451200 }, { "epoch": 0.33, "learning_rate": 0.0006750120365912373, "loss": 0.0727, "theoretical_loss": 3.646626003995685, "tokens_seen": 1094713344 }, { "epoch": 0.33, "learning_rate": 0.000674931792649655, "loss": 0.0757, "theoretical_loss": 3.6465449356002315, "tokens_seen": 1094975488 }, { "epoch": 0.33, "learning_rate": 0.0006748515487080725, "loss": 0.0712, "theoretical_loss": 3.646463892043574, "tokens_seen": 1095237632 }, { "epoch": 0.33, "learning_rate": 0.0006747713047664902, "loss": 0.0736, "theoretical_loss": 3.6463828733121586, "tokens_seen": 1095499776 }, { "epoch": 0.33, "learning_rate": 0.0006746910608249078, "loss": 0.0746, "theoretical_loss": 3.6463018793924453, "tokens_seen": 1095761920 }, { "epoch": 0.33, "learning_rate": 0.0006746108168833254, "loss": 0.0712, "theoretical_loss": 3.6462209102709, "tokens_seen": 1096024064 }, { "epoch": 0.33, "learning_rate": 0.0006745305729417429, "loss": 0.0721, "theoretical_loss": 3.6461399659340037, "tokens_seen": 1096286208 }, { "epoch": 0.33, "learning_rate": 0.0006744503290001605, "loss": 0.0738, "theoretical_loss": 3.6460590463682454, "tokens_seen": 1096548352 }, { "epoch": 0.33, "learning_rate": 0.0006743700850585781, "loss": 0.0727, "theoretical_loss": 3.6459781515601244, "tokens_seen": 1096810496 }, { "epoch": 0.33, "learning_rate": 0.0006742898411169956, "loss": 0.0749, "theoretical_loss": 3.6458972814961528, "tokens_seen": 1097072640 }, { "epoch": 0.33, "learning_rate": 0.0006742095971754133, "loss": 0.0723, "theoretical_loss": 3.6458164361628516, "tokens_seen": 1097334784 }, { "epoch": 0.33, "learning_rate": 0.0006741293532338308, "loss": 0.0721, "theoretical_loss": 3.645735615546752, "tokens_seen": 1097596928 }, { "epoch": 0.33, "learning_rate": 0.0006740491092922485, "loss": 0.0751, "theoretical_loss": 3.645654819634397, "tokens_seen": 1097859072 }, { "epoch": 0.33, "learning_rate": 0.0006739688653506661, "loss": 0.0735, "theoretical_loss": 3.6455740484123407, "tokens_seen": 1098121216 }, { "epoch": 0.33, "learning_rate": 0.0006738886214090836, "loss": 0.0713, "theoretical_loss": 3.645493301867145, "tokens_seen": 1098383360 }, { "epoch": 0.33, "learning_rate": 0.0006738083774675012, "loss": 0.0749, "theoretical_loss": 3.6454125799853854, "tokens_seen": 1098645504 }, { "epoch": 0.33, "learning_rate": 0.0006737281335259188, "loss": 0.0735, "theoretical_loss": 3.645331882753645, "tokens_seen": 1098907648 }, { "epoch": 0.33, "learning_rate": 0.0006736478895843364, "loss": 0.0732, "theoretical_loss": 3.6452512101585195, "tokens_seen": 1099169792 }, { "epoch": 0.33, "learning_rate": 0.000673567645642754, "loss": 0.0701, "theoretical_loss": 3.645170562186615, "tokens_seen": 1099431936 }, { "epoch": 0.33, "learning_rate": 0.0006734874017011716, "loss": 0.0742, "theoretical_loss": 3.6450899388245466, "tokens_seen": 1099694080 }, { "epoch": 0.33, "learning_rate": 0.0006734071577595891, "loss": 0.0747, "theoretical_loss": 3.645009340058941, "tokens_seen": 1099956224 }, { "epoch": 0.33, "learning_rate": 0.0006733269138180067, "loss": 0.0734, "theoretical_loss": 3.644928765876436, "tokens_seen": 1100218368 }, { "epoch": 0.33, "learning_rate": 0.0006732466698764244, "loss": 0.0722, "theoretical_loss": 3.644848216263678, "tokens_seen": 1100480512 }, { "epoch": 0.33, "learning_rate": 0.0006731664259348419, "loss": 0.075, "theoretical_loss": 3.6447676912073255, "tokens_seen": 1100742656 }, { "epoch": 0.33, "objective/train/advantage_avg": -0.001446638605557382, "objective/train/docs_used": 403530, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5791977643966675, "objective/train/original_loss": 1.5791977643966675, "objective/train/theoretical_loss": 3.6446871906940466, "objective/train/tokens_used": 1121464800, "objective/train/value_avg": -0.00872802734375, "objective/train/value_loss": 0.00039117602864280343, "objective/train/value_max": -0.00010389089584350586, "objective/train/value_min": -0.5576171875, "objective/train/value_reward_corr": 0.8311069137177848, "objective/train/value_std": 0.01605224609375, "objective/train/weight_avg": 0.998737633228302, "objective/train/weighted_lm_loss": 1.5767664909362793, "objective/train/weights_max": 1.3273746967315674, "objective/train/weights_min": 0.368156373500824, "theoretical_loss": 3.6446871906940466, "tokens_seen": 1101004800 }, { "epoch": 0.33, "learning_rate": 0.0006730861819932596, "loss": 0.0757, "theoretical_loss": 3.6446871906940466, "tokens_seen": 1101004800 }, { "epoch": 0.33, "learning_rate": 0.0006730059380516771, "loss": 0.072, "theoretical_loss": 3.6446067147105197, "tokens_seen": 1101266944 }, { "epoch": 0.33, "learning_rate": 0.0006729256941100947, "loss": 0.0713, "theoretical_loss": 3.644526263243433, "tokens_seen": 1101529088 }, { "epoch": 0.33, "learning_rate": 0.0006728454501685123, "loss": 0.0749, "theoretical_loss": 3.644445836279488, "tokens_seen": 1101791232 }, { "epoch": 0.33, "learning_rate": 0.0006727652062269298, "loss": 0.0697, "theoretical_loss": 3.644365433805393, "tokens_seen": 1102053376 }, { "epoch": 0.33, "learning_rate": 0.0006726849622853474, "loss": 0.0721, "theoretical_loss": 3.6442850558078685, "tokens_seen": 1102315520 }, { "epoch": 0.33, "learning_rate": 0.000672604718343765, "loss": 0.0766, "theoretical_loss": 3.6442047022736452, "tokens_seen": 1102577664 }, { "epoch": 0.33, "learning_rate": 0.0006725244744021827, "loss": 0.0759, "theoretical_loss": 3.644124373189464, "tokens_seen": 1102839808 }, { "epoch": 0.33, "learning_rate": 0.0006724442304606002, "loss": 0.0744, "theoretical_loss": 3.644044068542076, "tokens_seen": 1103101952 }, { "epoch": 0.33, "learning_rate": 0.0006723639865190179, "loss": 0.0728, "theoretical_loss": 3.643963788318242, "tokens_seen": 1103364096 }, { "epoch": 0.33, "learning_rate": 0.0006722837425774354, "loss": 0.0723, "theoretical_loss": 3.6438835325047356, "tokens_seen": 1103626240 }, { "epoch": 0.33, "learning_rate": 0.0006722034986358531, "loss": 0.0729, "theoretical_loss": 3.6438033010883375, "tokens_seen": 1103888384 }, { "epoch": 0.33, "learning_rate": 0.0006721232546942706, "loss": 0.0745, "theoretical_loss": 3.643723094055841, "tokens_seen": 1104150528 }, { "epoch": 0.33, "learning_rate": 0.0006720430107526881, "loss": 0.0767, "theoretical_loss": 3.643642911394048, "tokens_seen": 1104412672 }, { "epoch": 0.33, "learning_rate": 0.0006719627668111058, "loss": 0.0734, "theoretical_loss": 3.643562753089772, "tokens_seen": 1104674816 }, { "epoch": 0.33, "learning_rate": 0.0006718825228695233, "loss": 0.0733, "theoretical_loss": 3.6434826191298364, "tokens_seen": 1104936960 }, { "epoch": 0.33, "learning_rate": 0.000671802278927941, "loss": 0.0726, "theoretical_loss": 3.6434025095010747, "tokens_seen": 1105199104 }, { "epoch": 0.34, "learning_rate": 0.0006717220349863586, "loss": 0.0763, "theoretical_loss": 3.6433224241903304, "tokens_seen": 1105461248 }, { "epoch": 0.34, "learning_rate": 0.0006716417910447762, "loss": 0.0749, "theoretical_loss": 3.643242363184458, "tokens_seen": 1105723392 }, { "epoch": 0.34, "learning_rate": 0.0006715615471031937, "loss": 0.0741, "theoretical_loss": 3.6431623264703212, "tokens_seen": 1105985536 }, { "epoch": 0.34, "learning_rate": 0.0006714813031616113, "loss": 0.0711, "theoretical_loss": 3.6430823140347943, "tokens_seen": 1106247680 }, { "epoch": 0.34, "learning_rate": 0.0006714010592200289, "loss": 0.0726, "theoretical_loss": 3.643002325864763, "tokens_seen": 1106509824 }, { "epoch": 0.34, "learning_rate": 0.0006713208152784464, "loss": 0.0726, "theoretical_loss": 3.6429223619471207, "tokens_seen": 1106771968 }, { "epoch": 0.34, "learning_rate": 0.0006712405713368641, "loss": 0.0749, "theoretical_loss": 3.6428424222687736, "tokens_seen": 1107034112 }, { "epoch": 0.34, "learning_rate": 0.0006711603273952816, "loss": 0.074, "theoretical_loss": 3.642762506816636, "tokens_seen": 1107296256 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0007008896209299564, "objective/train/docs_used": 405946, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.543431043624878, "objective/train/original_loss": 1.5434308052062988, "objective/train/theoretical_loss": 3.642682615577634, "objective/train/tokens_used": 1128018400, "objective/train/value_avg": -0.0092010498046875, "objective/train/value_loss": 0.00045838035293854773, "objective/train/value_max": -7.969141006469727e-05, "objective/train/value_min": -0.953125, "objective/train/value_reward_corr": 0.5853789003053751, "objective/train/value_std": 0.0174102783203125, "objective/train/weight_avg": 1.000909447669983, "objective/train/weighted_lm_loss": 1.5446616411209106, "objective/train/weights_max": 2.1671571731567383, "objective/train/weights_min": 0.3689032196998596, "theoretical_loss": 3.642682615577634, "tokens_seen": 1107558400 }, { "epoch": 0.34, "learning_rate": 0.0006710800834536994, "loss": 0.0717, "theoretical_loss": 3.642682615577634, "tokens_seen": 1107558400 }, { "epoch": 0.34, "learning_rate": 0.0006709998395121169, "loss": 0.0757, "theoretical_loss": 3.6426027485387023, "tokens_seen": 1107820544 }, { "epoch": 0.34, "learning_rate": 0.0006709195955705344, "loss": 0.0733, "theoretical_loss": 3.6425229056867865, "tokens_seen": 1108082688 }, { "epoch": 0.34, "learning_rate": 0.0006708393516289521, "loss": 0.0723, "theoretical_loss": 3.642443087008844, "tokens_seen": 1108344832 }, { "epoch": 0.34, "learning_rate": 0.0006707591076873696, "loss": 0.0716, "theoretical_loss": 3.6423632924918383, "tokens_seen": 1108606976 }, { "epoch": 0.34, "learning_rate": 0.0006706788637457872, "loss": 0.0728, "theoretical_loss": 3.6422835221227468, "tokens_seen": 1108869120 }, { "epoch": 0.34, "learning_rate": 0.0006705986198042048, "loss": 0.0728, "theoretical_loss": 3.6422037758885555, "tokens_seen": 1109131264 }, { "epoch": 0.34, "learning_rate": 0.0006705183758626224, "loss": 0.0743, "theoretical_loss": 3.6421240537762607, "tokens_seen": 1109393408 }, { "epoch": 0.34, "learning_rate": 0.0006704381319210399, "loss": 0.0772, "theoretical_loss": 3.6420443557728674, "tokens_seen": 1109655552 }, { "epoch": 0.34, "learning_rate": 0.0006703578879794575, "loss": 0.0741, "theoretical_loss": 3.6419646818653932, "tokens_seen": 1109917696 }, { "epoch": 0.34, "learning_rate": 0.0006702776440378752, "loss": 0.076, "theoretical_loss": 3.641885032040864, "tokens_seen": 1110179840 }, { "epoch": 0.34, "learning_rate": 0.0006701974000962927, "loss": 0.0751, "theoretical_loss": 3.6418054062863163, "tokens_seen": 1110441984 }, { "epoch": 0.34, "learning_rate": 0.0006701171561547104, "loss": 0.073, "theoretical_loss": 3.6417258045887966, "tokens_seen": 1110704128 }, { "epoch": 0.34, "learning_rate": 0.0006700369122131279, "loss": 0.0744, "theoretical_loss": 3.641646226935361, "tokens_seen": 1110966272 }, { "epoch": 0.34, "learning_rate": 0.0006699566682715455, "loss": 0.0759, "theoretical_loss": 3.641566673313076, "tokens_seen": 1111228416 }, { "epoch": 0.34, "learning_rate": 0.0006698764243299631, "loss": 0.0776, "theoretical_loss": 3.6414871437090186, "tokens_seen": 1111490560 }, { "epoch": 0.34, "learning_rate": 0.0006697961803883806, "loss": 0.0752, "theoretical_loss": 3.641407638110275, "tokens_seen": 1111752704 }, { "epoch": 0.34, "learning_rate": 0.0006697159364467983, "loss": 0.0753, "theoretical_loss": 3.641328156503942, "tokens_seen": 1112014848 }, { "epoch": 0.34, "learning_rate": 0.0006696356925052158, "loss": 0.0757, "theoretical_loss": 3.6412486988771255, "tokens_seen": 1112276992 }, { "epoch": 0.34, "learning_rate": 0.0006695554485636335, "loss": 0.0727, "theoretical_loss": 3.6411692652169423, "tokens_seen": 1112539136 }, { "epoch": 0.34, "learning_rate": 0.0006694752046220511, "loss": 0.0749, "theoretical_loss": 3.641089855510518, "tokens_seen": 1112801280 }, { "epoch": 0.34, "learning_rate": 0.0006693949606804687, "loss": 0.0726, "theoretical_loss": 3.64101046974499, "tokens_seen": 1113063424 }, { "epoch": 0.34, "learning_rate": 0.0006693147167388862, "loss": 0.074, "theoretical_loss": 3.640931107907504, "tokens_seen": 1113325568 }, { "epoch": 0.34, "learning_rate": 0.0006692344727973039, "loss": 0.0767, "theoretical_loss": 3.6408517699852165, "tokens_seen": 1113587712 }, { "epoch": 0.34, "learning_rate": 0.0006691542288557214, "loss": 0.0779, "theoretical_loss": 3.640772455965293, "tokens_seen": 1113849856 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0008538411930203438, "objective/train/docs_used": 408181, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3940370082855225, "objective/train/original_loss": 1.3940370082855225, "objective/train/theoretical_loss": 3.64069316583491, "objective/train/tokens_used": 1134572000, "objective/train/value_avg": -0.0080718994140625, "objective/train/value_loss": 0.0002601642336230725, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.57763671875, "objective/train/value_reward_corr": 0.6369542776258966, "objective/train/value_std": 0.01348876953125, "objective/train/weight_avg": 1.0009734630584717, "objective/train/weighted_lm_loss": 1.3951406478881836, "objective/train/weights_max": 1.348272681236267, "objective/train/weights_min": 0.36883002519607544, "theoretical_loss": 3.64069316583491, "tokens_seen": 1114112000 }, { "epoch": 0.34, "learning_rate": 0.0006690739849141389, "loss": 0.0725, "theoretical_loss": 3.64069316583491, "tokens_seen": 1114112000 }, { "epoch": 0.34, "learning_rate": 0.0006689937409725566, "loss": 0.075, "theoretical_loss": 3.640613899581253, "tokens_seen": 1114374144 }, { "epoch": 0.34, "learning_rate": 0.0006689134970309741, "loss": 0.0775, "theoretical_loss": 3.6405346571915187, "tokens_seen": 1114636288 }, { "epoch": 0.34, "learning_rate": 0.0006688332530893918, "loss": 0.0755, "theoretical_loss": 3.6404554386529115, "tokens_seen": 1114898432 }, { "epoch": 0.34, "learning_rate": 0.0006687530091478094, "loss": 0.0721, "theoretical_loss": 3.640376243952648, "tokens_seen": 1115160576 }, { "epoch": 0.34, "learning_rate": 0.000668672765206227, "loss": 0.0721, "theoretical_loss": 3.640297073077953, "tokens_seen": 1115422720 }, { "epoch": 0.34, "learning_rate": 0.0006685925212646445, "loss": 0.0777, "theoretical_loss": 3.640217926016061, "tokens_seen": 1115684864 }, { "epoch": 0.34, "learning_rate": 0.0006685122773230621, "loss": 0.0741, "theoretical_loss": 3.6401388027542185, "tokens_seen": 1115947008 }, { "epoch": 0.34, "learning_rate": 0.0006684320333814797, "loss": 0.0707, "theoretical_loss": 3.6400597032796798, "tokens_seen": 1116209152 }, { "epoch": 0.34, "learning_rate": 0.0006683517894398973, "loss": 0.0752, "theoretical_loss": 3.6399806275797095, "tokens_seen": 1116471296 }, { "epoch": 0.34, "learning_rate": 0.0006682715454983149, "loss": 0.0761, "theoretical_loss": 3.639901575641582, "tokens_seen": 1116733440 }, { "epoch": 0.34, "learning_rate": 0.0006681913015567324, "loss": 0.0739, "theoretical_loss": 3.6398225474525816, "tokens_seen": 1116995584 }, { "epoch": 0.34, "learning_rate": 0.0006681110576151502, "loss": 0.0725, "theoretical_loss": 3.639743543000003, "tokens_seen": 1117257728 }, { "epoch": 0.34, "learning_rate": 0.0006680308136735677, "loss": 0.0732, "theoretical_loss": 3.6396645622711494, "tokens_seen": 1117519872 }, { "epoch": 0.34, "learning_rate": 0.0006679505697319852, "loss": 0.0764, "theoretical_loss": 3.639585605253335, "tokens_seen": 1117782016 }, { "epoch": 0.34, "learning_rate": 0.0006678703257904029, "loss": 0.0736, "theoretical_loss": 3.639506671933882, "tokens_seen": 1118044160 }, { "epoch": 0.34, "learning_rate": 0.0006677900818488204, "loss": 0.0739, "theoretical_loss": 3.639427762300125, "tokens_seen": 1118306304 }, { "epoch": 0.34, "learning_rate": 0.000667709837907238, "loss": 0.0747, "theoretical_loss": 3.6393488763394064, "tokens_seen": 1118568448 }, { "epoch": 0.34, "learning_rate": 0.0006676295939656556, "loss": 0.0737, "theoretical_loss": 3.639270014039078, "tokens_seen": 1118830592 }, { "epoch": 0.34, "learning_rate": 0.0006675493500240732, "loss": 0.0739, "theoretical_loss": 3.6391911753865034, "tokens_seen": 1119092736 }, { "epoch": 0.34, "learning_rate": 0.0006674691060824907, "loss": 0.0753, "theoretical_loss": 3.639112360369054, "tokens_seen": 1119354880 }, { "epoch": 0.34, "learning_rate": 0.0006673888621409083, "loss": 0.0764, "theoretical_loss": 3.6390335689741113, "tokens_seen": 1119617024 }, { "epoch": 0.34, "learning_rate": 0.000667308618199326, "loss": 0.0751, "theoretical_loss": 3.638954801189067, "tokens_seen": 1119879168 }, { "epoch": 0.34, "learning_rate": 0.0006672283742577436, "loss": 0.0758, "theoretical_loss": 3.6388760570013226, "tokens_seen": 1120141312 }, { "epoch": 0.34, "learning_rate": 0.0006671481303161612, "loss": 0.0729, "theoretical_loss": 3.6387973363982877, "tokens_seen": 1120403456 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.00035512945032678545, "objective/train/docs_used": 410712, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4525707960128784, "objective/train/original_loss": 1.4525706768035889, "objective/train/theoretical_loss": 3.638718639367384, "objective/train/tokens_used": 1141125600, "objective/train/value_avg": -0.00772857666015625, "objective/train/value_loss": 0.00021194637520238757, "objective/train/value_max": -5.3882598876953125e-05, "objective/train/value_min": -0.6298828125, "objective/train/value_reward_corr": 0.7386635939472923, "objective/train/value_std": 0.014801025390625, "objective/train/weight_avg": 1.0004568099975586, "objective/train/weighted_lm_loss": 1.453499674797058, "objective/train/weights_max": 1.5431358814239502, "objective/train/weights_min": 0.541466236114502, "theoretical_loss": 3.638718639367384, "tokens_seen": 1120665600 }, { "epoch": 0.34, "learning_rate": 0.0006670678863745787, "loss": 0.0727, "theoretical_loss": 3.638718639367384, "tokens_seen": 1120665600 }, { "epoch": 0.34, "learning_rate": 0.0006669876424329964, "loss": 0.0738, "theoretical_loss": 3.638639965896041, "tokens_seen": 1120927744 }, { "epoch": 0.34, "learning_rate": 0.0006669073984914139, "loss": 0.0765, "theoretical_loss": 3.638561315971698, "tokens_seen": 1121189888 }, { "epoch": 0.34, "learning_rate": 0.0006668271545498314, "loss": 0.0732, "theoretical_loss": 3.638482689581805, "tokens_seen": 1121452032 }, { "epoch": 0.34, "learning_rate": 0.0006667469106082491, "loss": 0.0724, "theoretical_loss": 3.6384040867138214, "tokens_seen": 1121714176 }, { "epoch": 0.34, "learning_rate": 0.0006666666666666666, "loss": 0.0765, "theoretical_loss": 3.6383255073552148, "tokens_seen": 1121976320 }, { "epoch": 0.34, "learning_rate": 0.0006665864227250843, "loss": 0.0752, "theoretical_loss": 3.638246951493463, "tokens_seen": 1122238464 }, { "epoch": 0.34, "learning_rate": 0.0006665061787835019, "loss": 0.0739, "theoretical_loss": 3.6381684191160555, "tokens_seen": 1122500608 }, { "epoch": 0.34, "learning_rate": 0.0006664259348419195, "loss": 0.0725, "theoretical_loss": 3.638089910210488, "tokens_seen": 1122762752 }, { "epoch": 0.34, "learning_rate": 0.000666345690900337, "loss": 0.0762, "theoretical_loss": 3.638011424764269, "tokens_seen": 1123024896 }, { "epoch": 0.34, "learning_rate": 0.0006662654469587546, "loss": 0.0711, "theoretical_loss": 3.6379329627649137, "tokens_seen": 1123287040 }, { "epoch": 0.34, "learning_rate": 0.0006661852030171722, "loss": 0.0732, "theoretical_loss": 3.6378545241999487, "tokens_seen": 1123549184 }, { "epoch": 0.34, "learning_rate": 0.0006661049590755897, "loss": 0.0746, "theoretical_loss": 3.637776109056909, "tokens_seen": 1123811328 }, { "epoch": 0.34, "learning_rate": 0.0006660247151340074, "loss": 0.0737, "theoretical_loss": 3.6376977173233405, "tokens_seen": 1124073472 }, { "epoch": 0.34, "learning_rate": 0.0006659444711924249, "loss": 0.0729, "theoretical_loss": 3.6376193489867976, "tokens_seen": 1124335616 }, { "epoch": 0.34, "learning_rate": 0.0006658642272508427, "loss": 0.0754, "theoretical_loss": 3.6375410040348446, "tokens_seen": 1124597760 }, { "epoch": 0.34, "learning_rate": 0.0006657839833092602, "loss": 0.071, "theoretical_loss": 3.637462682455055, "tokens_seen": 1124859904 }, { "epoch": 0.34, "learning_rate": 0.0006657037393676778, "loss": 0.0737, "theoretical_loss": 3.6373843842350118, "tokens_seen": 1125122048 }, { "epoch": 0.34, "learning_rate": 0.0006656234954260954, "loss": 0.0752, "theoretical_loss": 3.637306109362308, "tokens_seen": 1125384192 }, { "epoch": 0.34, "learning_rate": 0.0006655432514845129, "loss": 0.0735, "theoretical_loss": 3.6372278578245454, "tokens_seen": 1125646336 }, { "epoch": 0.34, "learning_rate": 0.0006654630075429305, "loss": 0.0732, "theoretical_loss": 3.6371496296093357, "tokens_seen": 1125908480 }, { "epoch": 0.34, "learning_rate": 0.0006653827636013481, "loss": 0.0746, "theoretical_loss": 3.6370714247043003, "tokens_seen": 1126170624 }, { "epoch": 0.34, "learning_rate": 0.0006653025196597657, "loss": 0.0736, "theoretical_loss": 3.6369932430970695, "tokens_seen": 1126432768 }, { "epoch": 0.34, "learning_rate": 0.0006652222757181832, "loss": 0.077, "theoretical_loss": 3.6369150847752834, "tokens_seen": 1126694912 }, { "epoch": 0.34, "learning_rate": 0.000665142031776601, "loss": 0.0746, "theoretical_loss": 3.6368369497265913, "tokens_seen": 1126957056 }, { "epoch": 0.34, "objective/train/advantage_avg": -0.0004652831412386149, "objective/train/docs_used": 413086, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3972619771957397, "objective/train/original_loss": 1.3972619771957397, "objective/train/theoretical_loss": 3.6367588379386513, "objective/train/tokens_used": 1147679200, "objective/train/value_avg": -0.0119476318359375, "objective/train/value_loss": 0.0004500811919569969, "objective/train/value_max": -0.00012242794036865234, "objective/train/value_min": -0.638671875, "objective/train/value_reward_corr": 0.8368038617349711, "objective/train/value_std": 0.02716064453125, "objective/train/weight_avg": 0.9997386932373047, "objective/train/weighted_lm_loss": 1.3970850706100464, "objective/train/weights_max": 1.5594542026519775, "objective/train/weights_min": 0.36921927332878113, "theoretical_loss": 3.6367588379386513, "tokens_seen": 1127219200 }, { "epoch": 0.34, "learning_rate": 0.0006650617878350185, "loss": 0.0732, "theoretical_loss": 3.6367588379386513, "tokens_seen": 1127219200 }, { "epoch": 0.34, "learning_rate": 0.000664981543893436, "loss": 0.0754, "theoretical_loss": 3.636680749399133, "tokens_seen": 1127481344 }, { "epoch": 0.34, "learning_rate": 0.0006649012999518537, "loss": 0.0744, "theoretical_loss": 3.6366026840957133, "tokens_seen": 1127743488 }, { "epoch": 0.34, "learning_rate": 0.0006648210560102712, "loss": 0.076, "theoretical_loss": 3.636524642016079, "tokens_seen": 1128005632 }, { "epoch": 0.34, "learning_rate": 0.0006647408120686889, "loss": 0.0743, "theoretical_loss": 3.636446623147927, "tokens_seen": 1128267776 }, { "epoch": 0.34, "learning_rate": 0.0006646605681271064, "loss": 0.0761, "theoretical_loss": 3.6363686274789626, "tokens_seen": 1128529920 }, { "epoch": 0.34, "learning_rate": 0.000664580324185524, "loss": 0.0731, "theoretical_loss": 3.6362906549969014, "tokens_seen": 1128792064 }, { "epoch": 0.34, "learning_rate": 0.0006645000802439416, "loss": 0.0741, "theoretical_loss": 3.6362127056894673, "tokens_seen": 1129054208 }, { "epoch": 0.34, "learning_rate": 0.0006644198363023591, "loss": 0.0751, "theoretical_loss": 3.6361347795443955, "tokens_seen": 1129316352 }, { "epoch": 0.34, "learning_rate": 0.0006643395923607768, "loss": 0.0739, "theoretical_loss": 3.636056876549427, "tokens_seen": 1129578496 }, { "epoch": 0.34, "learning_rate": 0.0006642593484191944, "loss": 0.0738, "theoretical_loss": 3.6359789966923164, "tokens_seen": 1129840640 }, { "epoch": 0.34, "learning_rate": 0.000664179104477612, "loss": 0.0773, "theoretical_loss": 3.6359011399608243, "tokens_seen": 1130102784 }, { "epoch": 0.34, "learning_rate": 0.0006640988605360295, "loss": 0.0748, "theoretical_loss": 3.6358233063427225, "tokens_seen": 1130364928 }, { "epoch": 0.34, "learning_rate": 0.0006640186165944472, "loss": 0.0717, "theoretical_loss": 3.635745495825791, "tokens_seen": 1130627072 }, { "epoch": 0.34, "learning_rate": 0.0006639383726528647, "loss": 0.0721, "theoretical_loss": 3.63566770839782, "tokens_seen": 1130889216 }, { "epoch": 0.34, "learning_rate": 0.0006638581287112822, "loss": 0.0742, "theoretical_loss": 3.6355899440466075, "tokens_seen": 1131151360 }, { "epoch": 0.34, "learning_rate": 0.0006637778847696999, "loss": 0.0733, "theoretical_loss": 3.635512202759964, "tokens_seen": 1131413504 }, { "epoch": 0.34, "learning_rate": 0.0006636976408281174, "loss": 0.0738, "theoretical_loss": 3.635434484525704, "tokens_seen": 1131675648 }, { "epoch": 0.34, "learning_rate": 0.0006636173968865351, "loss": 0.0759, "theoretical_loss": 3.6353567893316567, "tokens_seen": 1131937792 }, { "epoch": 0.34, "learning_rate": 0.0006635371529449527, "loss": 0.0737, "theoretical_loss": 3.6352791171656573, "tokens_seen": 1132199936 }, { "epoch": 0.34, "learning_rate": 0.0006634569090033703, "loss": 0.0742, "theoretical_loss": 3.635201468015551, "tokens_seen": 1132462080 }, { "epoch": 0.34, "learning_rate": 0.0006633766650617879, "loss": 0.074, "theoretical_loss": 3.635123841869193, "tokens_seen": 1132724224 }, { "epoch": 0.34, "learning_rate": 0.0006632964211202054, "loss": 0.0751, "theoretical_loss": 3.6350462387144464, "tokens_seen": 1132986368 }, { "epoch": 0.34, "learning_rate": 0.000663216177178623, "loss": 0.0747, "theoretical_loss": 3.634968658539184, "tokens_seen": 1133248512 }, { "epoch": 0.34, "learning_rate": 0.0006631359332370406, "loss": 0.0768, "theoretical_loss": 3.6348911013312883, "tokens_seen": 1133510656 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0009396148379892111, "objective/train/docs_used": 415442, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4944310188293457, "objective/train/original_loss": 1.4944311380386353, "objective/train/theoretical_loss": 3.6348135670786506, "objective/train/tokens_used": 1154232800, "objective/train/value_avg": -0.0083770751953125, "objective/train/value_loss": 0.00016622063412796706, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.54296875, "objective/train/value_reward_corr": 0.7126787421781615, "objective/train/value_std": 0.01410675048828125, "objective/train/weight_avg": 1.0010205507278442, "objective/train/weighted_lm_loss": 1.4958319664001465, "objective/train/weights_max": 1.4884049892425537, "objective/train/weights_min": 0.5564592480659485, "theoretical_loss": 3.6348135670786506, "tokens_seen": 1133772800 }, { "epoch": 0.34, "learning_rate": 0.0006630556892954582, "loss": 0.0735, "theoretical_loss": 3.6348135670786506, "tokens_seen": 1133772800 }, { "epoch": 0.34, "learning_rate": 0.0006629754453538757, "loss": 0.0746, "theoretical_loss": 3.6347360557691712, "tokens_seen": 1134034944 }, { "epoch": 0.34, "learning_rate": 0.0006628952014122935, "loss": 0.0737, "theoretical_loss": 3.63465856739076, "tokens_seen": 1134297088 }, { "epoch": 0.34, "learning_rate": 0.000662814957470711, "loss": 0.0745, "theoretical_loss": 3.634581101931336, "tokens_seen": 1134559232 }, { "epoch": 0.34, "learning_rate": 0.0006627347135291286, "loss": 0.0753, "theoretical_loss": 3.6345036593788276, "tokens_seen": 1134821376 }, { "epoch": 0.34, "learning_rate": 0.0006626544695875462, "loss": 0.0758, "theoretical_loss": 3.6344262397211704, "tokens_seen": 1135083520 }, { "epoch": 0.34, "learning_rate": 0.0006625742256459637, "loss": 0.0753, "theoretical_loss": 3.6343488429463124, "tokens_seen": 1135345664 }, { "epoch": 0.34, "learning_rate": 0.0006624939817043813, "loss": 0.076, "theoretical_loss": 3.634271469042208, "tokens_seen": 1135607808 }, { "epoch": 0.34, "learning_rate": 0.0006624137377627989, "loss": 0.0727, "theoretical_loss": 3.634194117996822, "tokens_seen": 1135869952 }, { "epoch": 0.34, "learning_rate": 0.0006623334938212165, "loss": 0.0736, "theoretical_loss": 3.634116789798129, "tokens_seen": 1136132096 }, { "epoch": 0.34, "learning_rate": 0.000662253249879634, "loss": 0.0768, "theoretical_loss": 3.6340394844341097, "tokens_seen": 1136394240 }, { "epoch": 0.34, "learning_rate": 0.0006621730059380518, "loss": 0.0768, "theoretical_loss": 3.6339622018927575, "tokens_seen": 1136656384 }, { "epoch": 0.34, "learning_rate": 0.0006620927619964693, "loss": 0.0738, "theoretical_loss": 3.633884942162073, "tokens_seen": 1136918528 }, { "epoch": 0.34, "learning_rate": 0.0006620125180548869, "loss": 0.0733, "theoretical_loss": 3.6338077052300664, "tokens_seen": 1137180672 }, { "epoch": 0.34, "learning_rate": 0.0006619322741133045, "loss": 0.0725, "theoretical_loss": 3.633730491084756, "tokens_seen": 1137442816 }, { "epoch": 0.34, "learning_rate": 0.000661852030171722, "loss": 0.0746, "theoretical_loss": 3.6336532997141706, "tokens_seen": 1137704960 }, { "epoch": 0.34, "learning_rate": 0.0006617717862301397, "loss": 0.0779, "theoretical_loss": 3.6335761311063473, "tokens_seen": 1137967104 }, { "epoch": 0.34, "learning_rate": 0.0006616915422885572, "loss": 0.0763, "theoretical_loss": 3.633498985249332, "tokens_seen": 1138229248 }, { "epoch": 0.35, "learning_rate": 0.0006616112983469748, "loss": 0.0738, "theoretical_loss": 3.63342186213118, "tokens_seen": 1138491392 }, { "epoch": 0.35, "learning_rate": 0.0006615310544053924, "loss": 0.0735, "theoretical_loss": 3.6333447617399557, "tokens_seen": 1138753536 }, { "epoch": 0.35, "learning_rate": 0.00066145081046381, "loss": 0.0751, "theoretical_loss": 3.6332676840637324, "tokens_seen": 1139015680 }, { "epoch": 0.35, "learning_rate": 0.0006613705665222276, "loss": 0.0758, "theoretical_loss": 3.633190629090592, "tokens_seen": 1139277824 }, { "epoch": 0.35, "learning_rate": 0.0006612903225806452, "loss": 0.075, "theoretical_loss": 3.6331135968086263, "tokens_seen": 1139539968 }, { "epoch": 0.35, "learning_rate": 0.0006612100786390628, "loss": 0.0738, "theoretical_loss": 3.633036587205935, "tokens_seen": 1139802112 }, { "epoch": 0.35, "learning_rate": 0.0006611298346974803, "loss": 0.0745, "theoretical_loss": 3.6329596002706275, "tokens_seen": 1140064256 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0012790506007149816, "objective/train/docs_used": 417664, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.491121530532837, "objective/train/original_loss": 1.491121530532837, "objective/train/theoretical_loss": 3.632882635990822, "objective/train/tokens_used": 1160786400, "objective/train/value_avg": -0.006160736083984375, "objective/train/value_loss": 0.00011857429490191862, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.5185546875, "objective/train/value_reward_corr": 0.7576070544182378, "objective/train/value_std": 0.01470184326171875, "objective/train/weight_avg": 1.0013362169265747, "objective/train/weighted_lm_loss": 1.4934520721435547, "objective/train/weights_max": 1.1257035732269287, "objective/train/weights_min": 0.4068576395511627, "theoretical_loss": 3.632882635990822, "tokens_seen": 1140326400 }, { "epoch": 0.35, "learning_rate": 0.000661049590755898, "loss": 0.0743, "theoretical_loss": 3.632882635990822, "tokens_seen": 1140326400 }, { "epoch": 0.35, "learning_rate": 0.0006609693468143155, "loss": 0.0763, "theoretical_loss": 3.632805694354646, "tokens_seen": 1140588544 }, { "epoch": 0.35, "learning_rate": 0.0006608891028727331, "loss": 0.0751, "theoretical_loss": 3.6327287753502358, "tokens_seen": 1140850688 }, { "epoch": 0.35, "learning_rate": 0.0006608088589311507, "loss": 0.0752, "theoretical_loss": 3.632651878965735, "tokens_seen": 1141112832 }, { "epoch": 0.35, "learning_rate": 0.0006607286149895682, "loss": 0.0735, "theoretical_loss": 3.632575005189299, "tokens_seen": 1141374976 }, { "epoch": 0.35, "learning_rate": 0.000660648371047986, "loss": 0.0763, "theoretical_loss": 3.6324981540090895, "tokens_seen": 1141637120 }, { "epoch": 0.35, "learning_rate": 0.0006605681271064035, "loss": 0.0741, "theoretical_loss": 3.6324213254132793, "tokens_seen": 1141899264 }, { "epoch": 0.35, "learning_rate": 0.0006604878831648211, "loss": 0.0758, "theoretical_loss": 3.632344519390049, "tokens_seen": 1142161408 }, { "epoch": 0.35, "learning_rate": 0.0006604076392232387, "loss": 0.0725, "theoretical_loss": 3.632267735927588, "tokens_seen": 1142423552 }, { "epoch": 0.35, "learning_rate": 0.0006603273952816562, "loss": 0.0745, "theoretical_loss": 3.632190975014094, "tokens_seen": 1142685696 }, { "epoch": 0.35, "learning_rate": 0.0006602471513400738, "loss": 0.0721, "theoretical_loss": 3.6321142366377757, "tokens_seen": 1142947840 }, { "epoch": 0.35, "learning_rate": 0.0006601669073984914, "loss": 0.0719, "theoretical_loss": 3.6320375207868483, "tokens_seen": 1143209984 }, { "epoch": 0.35, "learning_rate": 0.000660086663456909, "loss": 0.073, "theoretical_loss": 3.6319608274495376, "tokens_seen": 1143472128 }, { "epoch": 0.35, "learning_rate": 0.0006600064195153265, "loss": 0.0744, "theoretical_loss": 3.6318841566140767, "tokens_seen": 1143734272 }, { "epoch": 0.35, "learning_rate": 0.0006599261755737443, "loss": 0.0722, "theoretical_loss": 3.63180750826871, "tokens_seen": 1143996416 }, { "epoch": 0.35, "learning_rate": 0.0006598459316321618, "loss": 0.0701, "theoretical_loss": 3.6317308824016874, "tokens_seen": 1144258560 }, { "epoch": 0.35, "learning_rate": 0.0006597656876905794, "loss": 0.0707, "theoretical_loss": 3.6316542790012702, "tokens_seen": 1144520704 }, { "epoch": 0.35, "learning_rate": 0.000659685443748997, "loss": 0.0717, "theoretical_loss": 3.631577698055727, "tokens_seen": 1144782848 }, { "epoch": 0.35, "learning_rate": 0.0006596051998074145, "loss": 0.0705, "theoretical_loss": 3.631501139553337, "tokens_seen": 1145044992 }, { "epoch": 0.35, "learning_rate": 0.0006595249558658322, "loss": 0.0738, "theoretical_loss": 3.6314246034823867, "tokens_seen": 1145307136 }, { "epoch": 0.35, "learning_rate": 0.0006594447119242497, "loss": 0.075, "theoretical_loss": 3.631348089831171, "tokens_seen": 1145569280 }, { "epoch": 0.35, "learning_rate": 0.0006593644679826673, "loss": 0.0737, "theoretical_loss": 3.631271598587995, "tokens_seen": 1145831424 }, { "epoch": 0.35, "learning_rate": 0.0006592842240410849, "loss": 0.0739, "theoretical_loss": 3.631195129741172, "tokens_seen": 1146093568 }, { "epoch": 0.35, "learning_rate": 0.0006592039800995026, "loss": 0.0747, "theoretical_loss": 3.631118683279024, "tokens_seen": 1146355712 }, { "epoch": 0.35, "learning_rate": 0.0006591237361579201, "loss": 0.0734, "theoretical_loss": 3.6310422591898814, "tokens_seen": 1146617856 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0006740565877407789, "objective/train/docs_used": 419954, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3567689657211304, "objective/train/original_loss": 1.3567688465118408, "objective/train/theoretical_loss": 3.630965857462084, "objective/train/tokens_used": 1167340000, "objective/train/value_avg": -0.00856781005859375, "objective/train/value_loss": 0.0002693459973670542, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.62451171875, "objective/train/value_reward_corr": 0.6644929738275986, "objective/train/value_std": 0.01461029052734375, "objective/train/weight_avg": 1.0007959604263306, "objective/train/weighted_lm_loss": 1.357459545135498, "objective/train/weights_max": 1.5120787620544434, "objective/train/weights_min": 0.3692566156387329, "theoretical_loss": 3.630965857462084, "tokens_seen": 1146880000 }, { "epoch": 0.35, "learning_rate": 0.0006590434922163377, "loss": 0.0715, "theoretical_loss": 3.630965857462084, "tokens_seen": 1146880000 }, { "epoch": 0.35, "learning_rate": 0.0006589632482747553, "loss": 0.0742, "theoretical_loss": 3.6308894780839798, "tokens_seen": 1147142144 }, { "epoch": 0.35, "learning_rate": 0.0006588830043331728, "loss": 0.0715, "theoretical_loss": 3.630813121043926, "tokens_seen": 1147404288 }, { "epoch": 0.35, "learning_rate": 0.0006588027603915905, "loss": 0.0738, "theoretical_loss": 3.630736786330288, "tokens_seen": 1147666432 }, { "epoch": 0.35, "learning_rate": 0.000658722516450008, "loss": 0.0747, "theoretical_loss": 3.630660473931441, "tokens_seen": 1147928576 }, { "epoch": 0.35, "learning_rate": 0.0006586422725084256, "loss": 0.0754, "theoretical_loss": 3.6305841838357673, "tokens_seen": 1148190720 }, { "epoch": 0.35, "learning_rate": 0.0006585620285668432, "loss": 0.0704, "theoretical_loss": 3.630507916031659, "tokens_seen": 1148452864 }, { "epoch": 0.35, "learning_rate": 0.0006584817846252607, "loss": 0.0744, "theoretical_loss": 3.630431670507517, "tokens_seen": 1148715008 }, { "epoch": 0.35, "learning_rate": 0.0006584015406836785, "loss": 0.0741, "theoretical_loss": 3.6303554472517496, "tokens_seen": 1148977152 }, { "epoch": 0.35, "learning_rate": 0.000658321296742096, "loss": 0.0715, "theoretical_loss": 3.6302792462527758, "tokens_seen": 1149239296 }, { "epoch": 0.35, "learning_rate": 0.0006582410528005136, "loss": 0.071, "theoretical_loss": 3.6302030674990213, "tokens_seen": 1149501440 }, { "epoch": 0.35, "learning_rate": 0.0006581608088589312, "loss": 0.075, "theoretical_loss": 3.6301269109789214, "tokens_seen": 1149763584 }, { "epoch": 0.35, "learning_rate": 0.0006580805649173488, "loss": 0.0717, "theoretical_loss": 3.63005077668092, "tokens_seen": 1150025728 }, { "epoch": 0.35, "learning_rate": 0.0006580003209757663, "loss": 0.0723, "theoretical_loss": 3.62997466459347, "tokens_seen": 1150287872 }, { "epoch": 0.35, "learning_rate": 0.0006579200770341839, "loss": 0.0704, "theoretical_loss": 3.629898574705031, "tokens_seen": 1150550016 }, { "epoch": 0.35, "learning_rate": 0.0006578398330926015, "loss": 0.0734, "theoretical_loss": 3.629822507004075, "tokens_seen": 1150812160 }, { "epoch": 0.35, "learning_rate": 0.000657759589151019, "loss": 0.0721, "theoretical_loss": 3.629746461479079, "tokens_seen": 1151074304 }, { "epoch": 0.35, "learning_rate": 0.0006576793452094368, "loss": 0.0714, "theoretical_loss": 3.62967043811853, "tokens_seen": 1151336448 }, { "epoch": 0.35, "learning_rate": 0.0006575991012678543, "loss": 0.0736, "theoretical_loss": 3.629594436910924, "tokens_seen": 1151598592 }, { "epoch": 0.35, "learning_rate": 0.0006575188573262719, "loss": 0.0727, "theoretical_loss": 3.6295184578447643, "tokens_seen": 1151860736 }, { "epoch": 0.35, "learning_rate": 0.0006574386133846895, "loss": 0.0698, "theoretical_loss": 3.6294425009085645, "tokens_seen": 1152122880 }, { "epoch": 0.35, "learning_rate": 0.000657358369443107, "loss": 0.073, "theoretical_loss": 3.6293665660908454, "tokens_seen": 1152385024 }, { "epoch": 0.35, "learning_rate": 0.0006572781255015246, "loss": 0.0718, "theoretical_loss": 3.6292906533801372, "tokens_seen": 1152647168 }, { "epoch": 0.35, "learning_rate": 0.0006571978815599422, "loss": 0.074, "theoretical_loss": 3.6292147627649776, "tokens_seen": 1152909312 }, { "epoch": 0.35, "learning_rate": 0.0006571176376183598, "loss": 0.0751, "theoretical_loss": 3.6291388942339147, "tokens_seen": 1153171456 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0011869670124724507, "objective/train/docs_used": 422447, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4943604469299316, "objective/train/original_loss": 1.4943604469299316, "objective/train/theoretical_loss": 3.6290630477755026, "objective/train/tokens_used": 1173893600, "objective/train/value_avg": -0.0102081298828125, "objective/train/value_loss": 0.00040764882578514516, "objective/train/value_max": -7.31348991394043e-05, "objective/train/value_min": -0.70703125, "objective/train/value_reward_corr": 0.6485598952753047, "objective/train/value_std": 0.0184478759765625, "objective/train/weight_avg": 1.0013765096664429, "objective/train/weighted_lm_loss": 1.496140718460083, "objective/train/weights_max": 1.7507752180099487, "objective/train/weights_min": 0.3769771456718445, "theoretical_loss": 3.6290630477755026, "tokens_seen": 1153433600 }, { "epoch": 0.35, "learning_rate": 0.0006570373936767774, "loss": 0.0714, "theoretical_loss": 3.6290630477755026, "tokens_seen": 1153433600 }, { "epoch": 0.35, "learning_rate": 0.0006569571497351951, "loss": 0.0716, "theoretical_loss": 3.6289872233783065, "tokens_seen": 1153695744 }, { "epoch": 0.35, "learning_rate": 0.0006568769057936126, "loss": 0.0704, "theoretical_loss": 3.6289114210308977, "tokens_seen": 1153957888 }, { "epoch": 0.35, "learning_rate": 0.0006567966618520302, "loss": 0.0727, "theoretical_loss": 3.628835640721859, "tokens_seen": 1154220032 }, { "epoch": 0.35, "learning_rate": 0.0006567164179104478, "loss": 0.0725, "theoretical_loss": 3.6287598824397787, "tokens_seen": 1154482176 }, { "epoch": 0.35, "learning_rate": 0.0006566361739688653, "loss": 0.0745, "theoretical_loss": 3.6286841461732546, "tokens_seen": 1154744320 }, { "epoch": 0.35, "learning_rate": 0.000656555930027283, "loss": 0.0711, "theoretical_loss": 3.6286084319108944, "tokens_seen": 1155006464 }, { "epoch": 0.35, "learning_rate": 0.0006564756860857005, "loss": 0.07, "theoretical_loss": 3.628532739641312, "tokens_seen": 1155268608 }, { "epoch": 0.35, "learning_rate": 0.0006563954421441181, "loss": 0.0699, "theoretical_loss": 3.6284570693531317, "tokens_seen": 1155530752 }, { "epoch": 0.35, "learning_rate": 0.0006563151982025357, "loss": 0.0715, "theoretical_loss": 3.6283814210349847, "tokens_seen": 1155792896 }, { "epoch": 0.35, "learning_rate": 0.0006562349542609534, "loss": 0.0707, "theoretical_loss": 3.628305794675512, "tokens_seen": 1156055040 }, { "epoch": 0.35, "learning_rate": 0.0006561547103193709, "loss": 0.0711, "theoretical_loss": 3.6282301902633627, "tokens_seen": 1156317184 }, { "epoch": 0.35, "learning_rate": 0.0006560744663777885, "loss": 0.069, "theoretical_loss": 3.628154607787194, "tokens_seen": 1156579328 }, { "epoch": 0.35, "learning_rate": 0.0006559942224362061, "loss": 0.0703, "theoretical_loss": 3.6280790472356705, "tokens_seen": 1156841472 }, { "epoch": 0.35, "learning_rate": 0.0006559139784946236, "loss": 0.0698, "theoretical_loss": 3.628003508597468, "tokens_seen": 1157103616 }, { "epoch": 0.35, "learning_rate": 0.0006558337345530413, "loss": 0.0734, "theoretical_loss": 3.6279279918612675, "tokens_seen": 1157365760 }, { "epoch": 0.35, "learning_rate": 0.0006557534906114588, "loss": 0.0704, "theoretical_loss": 3.6278524970157613, "tokens_seen": 1157627904 }, { "epoch": 0.35, "learning_rate": 0.0006556732466698765, "loss": 0.0729, "theoretical_loss": 3.6277770240496476, "tokens_seen": 1157890048 }, { "epoch": 0.35, "learning_rate": 0.000655593002728294, "loss": 0.0693, "theoretical_loss": 3.6277015729516355, "tokens_seen": 1158152192 }, { "epoch": 0.35, "learning_rate": 0.0006555127587867115, "loss": 0.0708, "theoretical_loss": 3.6276261437104402, "tokens_seen": 1158414336 }, { "epoch": 0.35, "learning_rate": 0.0006554325148451293, "loss": 0.0739, "theoretical_loss": 3.6275507363147868, "tokens_seen": 1158676480 }, { "epoch": 0.35, "learning_rate": 0.0006553522709035468, "loss": 0.0713, "theoretical_loss": 3.6274753507534077, "tokens_seen": 1158938624 }, { "epoch": 0.35, "learning_rate": 0.0006552720269619644, "loss": 0.0745, "theoretical_loss": 3.6273999870150444, "tokens_seen": 1159200768 }, { "epoch": 0.35, "learning_rate": 0.000655191783020382, "loss": 0.0682, "theoretical_loss": 3.627324645088446, "tokens_seen": 1159462912 }, { "epoch": 0.35, "learning_rate": 0.0006551115390787996, "loss": 0.0721, "theoretical_loss": 3.627249324962371, "tokens_seen": 1159725056 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0011229667579755187, "objective/train/docs_used": 424758, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5378754138946533, "objective/train/original_loss": 1.5378755331039429, "objective/train/theoretical_loss": 3.6271740266255854, "objective/train/tokens_used": 1180447200, "objective/train/value_avg": -0.0070648193359375, "objective/train/value_loss": 0.00018219789490103722, "objective/train/value_max": -0.00013446807861328125, "objective/train/value_min": -0.4599609375, "objective/train/value_reward_corr": 0.6781258515515318, "objective/train/value_std": 0.01233673095703125, "objective/train/weight_avg": 1.0012083053588867, "objective/train/weighted_lm_loss": 1.5396517515182495, "objective/train/weights_max": 1.1805914640426636, "objective/train/weights_min": 0.37442609667778015, "theoretical_loss": 3.6271740266255854, "tokens_seen": 1159987200 }, { "epoch": 0.35, "learning_rate": 0.0006550312951372171, "loss": 0.0719, "theoretical_loss": 3.6271740266255854, "tokens_seen": 1159987200 }, { "epoch": 0.35, "learning_rate": 0.0006549510511956347, "loss": 0.0728, "theoretical_loss": 3.6270987500668648, "tokens_seen": 1160249344 }, { "epoch": 0.35, "learning_rate": 0.0006548708072540523, "loss": 0.0726, "theoretical_loss": 3.6270234952749902, "tokens_seen": 1160511488 }, { "epoch": 0.35, "learning_rate": 0.0006547905633124698, "loss": 0.0711, "theoretical_loss": 3.6269482622387548, "tokens_seen": 1160773632 }, { "epoch": 0.35, "learning_rate": 0.0006547103193708876, "loss": 0.0723, "theoretical_loss": 3.6268730509469567, "tokens_seen": 1161035776 }, { "epoch": 0.35, "learning_rate": 0.0006546300754293051, "loss": 0.0698, "theoretical_loss": 3.626797861388404, "tokens_seen": 1161297920 }, { "epoch": 0.35, "learning_rate": 0.0006545498314877228, "loss": 0.073, "theoretical_loss": 3.6267226935519132, "tokens_seen": 1161560064 }, { "epoch": 0.35, "learning_rate": 0.0006544695875461403, "loss": 0.0723, "theoretical_loss": 3.626647547426309, "tokens_seen": 1161822208 }, { "epoch": 0.35, "learning_rate": 0.0006543893436045578, "loss": 0.0729, "theoretical_loss": 3.6265724230004226, "tokens_seen": 1162084352 }, { "epoch": 0.35, "learning_rate": 0.0006543090996629755, "loss": 0.0711, "theoretical_loss": 3.6264973202630966, "tokens_seen": 1162346496 }, { "epoch": 0.35, "learning_rate": 0.000654228855721393, "loss": 0.0696, "theoretical_loss": 3.6264222392031797, "tokens_seen": 1162608640 }, { "epoch": 0.35, "learning_rate": 0.0006541486117798106, "loss": 0.0696, "theoretical_loss": 3.6263471798095286, "tokens_seen": 1162870784 }, { "epoch": 0.35, "learning_rate": 0.0006540683678382282, "loss": 0.0749, "theoretical_loss": 3.6262721420710093, "tokens_seen": 1163132928 }, { "epoch": 0.35, "learning_rate": 0.0006539881238966459, "loss": 0.0707, "theoretical_loss": 3.6261971259764962, "tokens_seen": 1163395072 }, { "epoch": 0.35, "learning_rate": 0.0006539078799550634, "loss": 0.0694, "theoretical_loss": 3.626122131514871, "tokens_seen": 1163657216 }, { "epoch": 0.35, "learning_rate": 0.000653827636013481, "loss": 0.0724, "theoretical_loss": 3.626047158675024, "tokens_seen": 1163919360 }, { "epoch": 0.35, "learning_rate": 0.0006537473920718986, "loss": 0.0701, "theoretical_loss": 3.625972207445854, "tokens_seen": 1164181504 }, { "epoch": 0.35, "learning_rate": 0.0006536671481303161, "loss": 0.0686, "theoretical_loss": 3.625897277816267, "tokens_seen": 1164443648 }, { "epoch": 0.35, "learning_rate": 0.0006535869041887338, "loss": 0.0709, "theoretical_loss": 3.625822369775179, "tokens_seen": 1164705792 }, { "epoch": 0.35, "learning_rate": 0.0006535066602471513, "loss": 0.0731, "theoretical_loss": 3.6257474833115113, "tokens_seen": 1164967936 }, { "epoch": 0.35, "learning_rate": 0.0006534264163055689, "loss": 0.0726, "theoretical_loss": 3.625672618414198, "tokens_seen": 1165230080 }, { "epoch": 0.35, "learning_rate": 0.0006533461723639865, "loss": 0.0714, "theoretical_loss": 3.6255977750721753, "tokens_seen": 1165492224 }, { "epoch": 0.35, "learning_rate": 0.0006532659284224042, "loss": 0.0747, "theoretical_loss": 3.6255229532743933, "tokens_seen": 1165754368 }, { "epoch": 0.35, "learning_rate": 0.0006531856844808218, "loss": 0.0712, "theoretical_loss": 3.625448153009807, "tokens_seen": 1166016512 }, { "epoch": 0.35, "learning_rate": 0.0006531054405392393, "loss": 0.0693, "theoretical_loss": 3.6253733742673795, "tokens_seen": 1166278656 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.002040127059444785, "objective/train/docs_used": 427016, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.472293734550476, "objective/train/original_loss": 1.4722938537597656, "objective/train/theoretical_loss": 3.625298617036084, "objective/train/tokens_used": 1187000800, "objective/train/value_avg": -0.01036834716796875, "objective/train/value_loss": 0.0002555120154283941, "objective/train/value_max": -0.00013339519500732422, "objective/train/value_min": -0.6376953125, "objective/train/value_reward_corr": 0.7320040769195669, "objective/train/value_std": 0.0182952880859375, "objective/train/weight_avg": 1.0021584033966064, "objective/train/weighted_lm_loss": 1.475948452949524, "objective/train/weights_max": 1.3658320903778076, "objective/train/weights_min": 0.3710109293460846, "theoretical_loss": 3.625298617036084, "tokens_seen": 1166540800 }, { "epoch": 0.35, "learning_rate": 0.0006530251965976569, "loss": 0.0744, "theoretical_loss": 3.625298617036084, "tokens_seen": 1166540800 }, { "epoch": 0.35, "learning_rate": 0.0006529449526560745, "loss": 0.0727, "theoretical_loss": 3.6252238813049, "tokens_seen": 1166802944 }, { "epoch": 0.35, "learning_rate": 0.0006528647087144921, "loss": 0.073, "theoretical_loss": 3.6251491670628155, "tokens_seen": 1167065088 }, { "epoch": 0.35, "learning_rate": 0.0006527844647729096, "loss": 0.071, "theoretical_loss": 3.6250744742988275, "tokens_seen": 1167327232 }, { "epoch": 0.35, "learning_rate": 0.0006527042208313273, "loss": 0.0727, "theoretical_loss": 3.6249998030019404, "tokens_seen": 1167589376 }, { "epoch": 0.35, "learning_rate": 0.0006526239768897448, "loss": 0.0707, "theoretical_loss": 3.6249251531611666, "tokens_seen": 1167851520 }, { "epoch": 0.35, "learning_rate": 0.0006525437329481623, "loss": 0.0715, "theoretical_loss": 3.6248505247655265, "tokens_seen": 1168113664 }, { "epoch": 0.35, "learning_rate": 0.0006524634890065801, "loss": 0.0731, "theoretical_loss": 3.6247759178040493, "tokens_seen": 1168375808 }, { "epoch": 0.35, "learning_rate": 0.0006523832450649976, "loss": 0.0703, "theoretical_loss": 3.624701332265772, "tokens_seen": 1168637952 }, { "epoch": 0.35, "learning_rate": 0.0006523030011234152, "loss": 0.0703, "theoretical_loss": 3.6246267681397386, "tokens_seen": 1168900096 }, { "epoch": 0.35, "learning_rate": 0.0006522227571818328, "loss": 0.0715, "theoretical_loss": 3.624552225415003, "tokens_seen": 1169162240 }, { "epoch": 0.35, "learning_rate": 0.0006521425132402504, "loss": 0.0691, "theoretical_loss": 3.6244777040806255, "tokens_seen": 1169424384 }, { "epoch": 0.35, "learning_rate": 0.0006520622692986679, "loss": 0.0722, "theoretical_loss": 3.624403204125676, "tokens_seen": 1169686528 }, { "epoch": 0.35, "learning_rate": 0.0006519820253570855, "loss": 0.0748, "theoretical_loss": 3.6243287255392307, "tokens_seen": 1169948672 }, { "epoch": 0.35, "learning_rate": 0.0006519017814155031, "loss": 0.0722, "theoretical_loss": 3.624254268310375, "tokens_seen": 1170210816 }, { "epoch": 0.35, "learning_rate": 0.0006518215374739207, "loss": 0.0697, "theoretical_loss": 3.6241798324282017, "tokens_seen": 1170472960 }, { "epoch": 0.35, "learning_rate": 0.0006517412935323384, "loss": 0.0733, "theoretical_loss": 3.624105417881813, "tokens_seen": 1170735104 }, { "epoch": 0.35, "learning_rate": 0.0006516610495907559, "loss": 0.0729, "theoretical_loss": 3.624031024660317, "tokens_seen": 1170997248 }, { "epoch": 0.35, "learning_rate": 0.0006515808056491736, "loss": 0.0734, "theoretical_loss": 3.6239566527528306, "tokens_seen": 1171259392 }, { "epoch": 0.36, "learning_rate": 0.0006515005617075911, "loss": 0.0714, "theoretical_loss": 3.6238823021484796, "tokens_seen": 1171521536 }, { "epoch": 0.36, "learning_rate": 0.0006514203177660086, "loss": 0.0728, "theoretical_loss": 3.6238079728363974, "tokens_seen": 1171783680 }, { "epoch": 0.36, "learning_rate": 0.0006513400738244263, "loss": 0.0711, "theoretical_loss": 3.6237336648057243, "tokens_seen": 1172045824 }, { "epoch": 0.36, "learning_rate": 0.0006512598298828438, "loss": 0.0691, "theoretical_loss": 3.62365937804561, "tokens_seen": 1172307968 }, { "epoch": 0.36, "learning_rate": 0.0006511795859412614, "loss": 0.0682, "theoretical_loss": 3.6235851125452108, "tokens_seen": 1172570112 }, { "epoch": 0.36, "learning_rate": 0.000651099341999679, "loss": 0.072, "theoretical_loss": 3.6235108682936916, "tokens_seen": 1172832256 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0011571807553991675, "objective/train/docs_used": 429267, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3518650531768799, "objective/train/original_loss": 1.3518651723861694, "objective/train/theoretical_loss": 3.623436645280226, "objective/train/tokens_used": 1193554400, "objective/train/value_avg": -0.00867462158203125, "objective/train/value_loss": 0.00022238181554712355, "objective/train/value_max": -8.612871170043945e-05, "objective/train/value_min": -0.67236328125, "objective/train/value_reward_corr": 0.6452302653535441, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 1.0012578964233398, "objective/train/weighted_lm_loss": 1.3529399633407593, "objective/train/weights_max": 1.2395051717758179, "objective/train/weights_min": 0.37000179290771484, "theoretical_loss": 3.623436645280226, "tokens_seen": 1173094400 }, { "epoch": 0.36, "learning_rate": 0.0006510190980580967, "loss": 0.0723, "theoretical_loss": 3.623436645280226, "tokens_seen": 1173094400 }, { "epoch": 0.36, "learning_rate": 0.0006509388541165142, "loss": 0.0724, "theoretical_loss": 3.6233624434939946, "tokens_seen": 1173356544 }, { "epoch": 0.36, "learning_rate": 0.0006508586101749318, "loss": 0.07, "theoretical_loss": 3.623288262924186, "tokens_seen": 1173618688 }, { "epoch": 0.36, "learning_rate": 0.0006507783662333494, "loss": 0.0738, "theoretical_loss": 3.623214103559997, "tokens_seen": 1173880832 }, { "epoch": 0.36, "learning_rate": 0.000650698122291767, "loss": 0.0719, "theoretical_loss": 3.6231399653906315, "tokens_seen": 1174142976 }, { "epoch": 0.36, "learning_rate": 0.0006506178783501846, "loss": 0.0686, "theoretical_loss": 3.623065848405303, "tokens_seen": 1174405120 }, { "epoch": 0.36, "learning_rate": 0.0006505376344086021, "loss": 0.0717, "theoretical_loss": 3.622991752593231, "tokens_seen": 1174667264 }, { "epoch": 0.36, "learning_rate": 0.0006504573904670198, "loss": 0.0724, "theoretical_loss": 3.6229176779436445, "tokens_seen": 1174929408 }, { "epoch": 0.36, "learning_rate": 0.0006503771465254373, "loss": 0.0705, "theoretical_loss": 3.6228436244457796, "tokens_seen": 1175191552 }, { "epoch": 0.36, "learning_rate": 0.000650296902583855, "loss": 0.0719, "theoretical_loss": 3.6227695920888796, "tokens_seen": 1175453696 }, { "epoch": 0.36, "learning_rate": 0.0006502166586422726, "loss": 0.0703, "theoretical_loss": 3.622695580862197, "tokens_seen": 1175715840 }, { "epoch": 0.36, "learning_rate": 0.0006501364147006901, "loss": 0.0703, "theoretical_loss": 3.6226215907549912, "tokens_seen": 1175977984 }, { "epoch": 0.36, "learning_rate": 0.0006500561707591077, "loss": 0.0717, "theoretical_loss": 3.6225476217565307, "tokens_seen": 1176240128 }, { "epoch": 0.36, "learning_rate": 0.0006499759268175253, "loss": 0.0711, "theoretical_loss": 3.6224736738560894, "tokens_seen": 1176502272 }, { "epoch": 0.36, "learning_rate": 0.0006498956828759429, "loss": 0.0754, "theoretical_loss": 3.6223997470429516, "tokens_seen": 1176764416 }, { "epoch": 0.36, "learning_rate": 0.0006498154389343604, "loss": 0.0713, "theoretical_loss": 3.6223258413064086, "tokens_seen": 1177026560 }, { "epoch": 0.36, "learning_rate": 0.0006497351949927781, "loss": 0.0733, "theoretical_loss": 3.6222519566357585, "tokens_seen": 1177288704 }, { "epoch": 0.36, "learning_rate": 0.0006496549510511956, "loss": 0.071, "theoretical_loss": 3.6221780930203096, "tokens_seen": 1177550848 }, { "epoch": 0.36, "learning_rate": 0.0006495747071096131, "loss": 0.073, "theoretical_loss": 3.6221042504493743, "tokens_seen": 1177812992 }, { "epoch": 0.36, "learning_rate": 0.0006494944631680309, "loss": 0.0736, "theoretical_loss": 3.622030428912276, "tokens_seen": 1178075136 }, { "epoch": 0.36, "learning_rate": 0.0006494142192264484, "loss": 0.0699, "theoretical_loss": 3.6219566283983458, "tokens_seen": 1178337280 }, { "epoch": 0.36, "learning_rate": 0.0006493339752848661, "loss": 0.0747, "theoretical_loss": 3.6218828488969197, "tokens_seen": 1178599424 }, { "epoch": 0.36, "learning_rate": 0.0006492537313432836, "loss": 0.0707, "theoretical_loss": 3.6218090903973446, "tokens_seen": 1178861568 }, { "epoch": 0.36, "learning_rate": 0.0006491734874017012, "loss": 0.0765, "theoretical_loss": 3.621735352888974, "tokens_seen": 1179123712 }, { "epoch": 0.36, "learning_rate": 0.0006490932434601188, "loss": 0.0787, "theoretical_loss": 3.621661636361169, "tokens_seen": 1179385856 }, { "epoch": 0.36, "objective/train/advantage_avg": -0.000662869424559176, "objective/train/docs_used": 431781, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5813153982162476, "objective/train/original_loss": 1.5813153982162476, "objective/train/theoretical_loss": 3.6215879408032987, "objective/train/tokens_used": 1200108000, "objective/train/value_avg": -0.00632476806640625, "objective/train/value_loss": 0.0001973478210857138, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.392822265625, "objective/train/value_reward_corr": 0.6522775297311824, "objective/train/value_std": 0.01153564453125, "objective/train/weight_avg": 0.9994257688522339, "objective/train/weighted_lm_loss": 1.5801219940185547, "objective/train/weights_max": 1.2123867273330688, "objective/train/weights_min": 0.3755246698856354, "theoretical_loss": 3.6215879408032987, "tokens_seen": 1179648000 }, { "epoch": 0.36, "learning_rate": 0.0006490129995185363, "loss": 0.0741, "theoretical_loss": 3.6215879408032987, "tokens_seen": 1179648000 }, { "epoch": 0.36, "learning_rate": 0.0006489327555769539, "loss": 0.0739, "theoretical_loss": 3.6215142662047395, "tokens_seen": 1179910144 }, { "epoch": 0.36, "learning_rate": 0.0006488525116353715, "loss": 0.074, "theoretical_loss": 3.621440612554876, "tokens_seen": 1180172288 }, { "epoch": 0.36, "learning_rate": 0.0006487722676937892, "loss": 0.0739, "theoretical_loss": 3.6213669798431005, "tokens_seen": 1180434432 }, { "epoch": 0.36, "learning_rate": 0.0006486920237522067, "loss": 0.0723, "theoretical_loss": 3.621293368058813, "tokens_seen": 1180696576 }, { "epoch": 0.36, "learning_rate": 0.0006486117798106244, "loss": 0.074, "theoretical_loss": 3.621219777191421, "tokens_seen": 1180958720 }, { "epoch": 0.36, "learning_rate": 0.0006485315358690419, "loss": 0.0744, "theoretical_loss": 3.62114620723034, "tokens_seen": 1181220864 }, { "epoch": 0.36, "learning_rate": 0.0006484512919274594, "loss": 0.0737, "theoretical_loss": 3.621072658164993, "tokens_seen": 1181483008 }, { "epoch": 0.36, "learning_rate": 0.0006483710479858771, "loss": 0.0735, "theoretical_loss": 3.6209991299848108, "tokens_seen": 1181745152 }, { "epoch": 0.36, "learning_rate": 0.0006482908040442946, "loss": 0.0752, "theoretical_loss": 3.620925622679232, "tokens_seen": 1182007296 }, { "epoch": 0.36, "learning_rate": 0.0006482105601027123, "loss": 0.074, "theoretical_loss": 3.620852136237702, "tokens_seen": 1182269440 }, { "epoch": 0.36, "learning_rate": 0.0006481303161611298, "loss": 0.074, "theoretical_loss": 3.620778670649676, "tokens_seen": 1182531584 }, { "epoch": 0.36, "learning_rate": 0.0006480500722195475, "loss": 0.0709, "theoretical_loss": 3.620705225904614, "tokens_seen": 1182793728 }, { "epoch": 0.36, "learning_rate": 0.0006479698282779651, "loss": 0.0733, "theoretical_loss": 3.620631801991987, "tokens_seen": 1183055872 }, { "epoch": 0.36, "learning_rate": 0.0006478895843363826, "loss": 0.075, "theoretical_loss": 3.6205583989012697, "tokens_seen": 1183318016 }, { "epoch": 0.36, "learning_rate": 0.0006478093403948002, "loss": 0.0748, "theoretical_loss": 3.6204850166219478, "tokens_seen": 1183580160 }, { "epoch": 0.36, "learning_rate": 0.0006477290964532178, "loss": 0.0699, "theoretical_loss": 3.6204116551435126, "tokens_seen": 1183842304 }, { "epoch": 0.36, "learning_rate": 0.0006476488525116354, "loss": 0.0745, "theoretical_loss": 3.620338314455465, "tokens_seen": 1184104448 }, { "epoch": 0.36, "learning_rate": 0.0006475686085700529, "loss": 0.0742, "theoretical_loss": 3.6202649945473113, "tokens_seen": 1184366592 }, { "epoch": 0.36, "learning_rate": 0.0006474883646284706, "loss": 0.0724, "theoretical_loss": 3.6201916954085664, "tokens_seen": 1184628736 }, { "epoch": 0.36, "learning_rate": 0.0006474081206868881, "loss": 0.0725, "theoretical_loss": 3.620118417028754, "tokens_seen": 1184890880 }, { "epoch": 0.36, "learning_rate": 0.0006473278767453056, "loss": 0.0735, "theoretical_loss": 3.620045159397403, "tokens_seen": 1185153024 }, { "epoch": 0.36, "learning_rate": 0.0006472476328037234, "loss": 0.0741, "theoretical_loss": 3.619971922504052, "tokens_seen": 1185415168 }, { "epoch": 0.36, "learning_rate": 0.0006471673888621409, "loss": 0.0711, "theoretical_loss": 3.6198987063382457, "tokens_seen": 1185677312 }, { "epoch": 0.36, "learning_rate": 0.0006470871449205585, "loss": 0.0757, "theoretical_loss": 3.6198255108895374, "tokens_seen": 1185939456 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0009745230199769139, "objective/train/docs_used": 434243, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5072544813156128, "objective/train/original_loss": 1.5072546005249023, "objective/train/theoretical_loss": 3.6197523361474877, "objective/train/tokens_used": 1206661600, "objective/train/value_avg": -0.00830841064453125, "objective/train/value_loss": 0.00030490392236970365, "objective/train/value_max": -9.763240814208984e-05, "objective/train/value_min": -0.658203125, "objective/train/value_reward_corr": 0.6350907920593141, "objective/train/value_std": 0.0146636962890625, "objective/train/weight_avg": 1.0011084079742432, "objective/train/weighted_lm_loss": 1.509034514427185, "objective/train/weights_max": 1.5940988063812256, "objective/train/weights_min": 0.37102511525154114, "theoretical_loss": 3.6197523361474877, "tokens_seen": 1186201600 }, { "epoch": 0.36, "learning_rate": 0.0006470069009789761, "loss": 0.0753, "theoretical_loss": 3.6197523361474877, "tokens_seen": 1186201600 }, { "epoch": 0.36, "learning_rate": 0.0006469266570373937, "loss": 0.0723, "theoretical_loss": 3.619679182101664, "tokens_seen": 1186463744 }, { "epoch": 0.36, "learning_rate": 0.0006468464130958113, "loss": 0.0766, "theoretical_loss": 3.619606048741643, "tokens_seen": 1186725888 }, { "epoch": 0.36, "learning_rate": 0.0006467661691542289, "loss": 0.0751, "theoretical_loss": 3.6195329360570065, "tokens_seen": 1186988032 }, { "epoch": 0.36, "learning_rate": 0.0006466859252126464, "loss": 0.0743, "theoretical_loss": 3.6194598440373467, "tokens_seen": 1187250176 }, { "epoch": 0.36, "learning_rate": 0.000646605681271064, "loss": 0.072, "theoretical_loss": 3.619386772672261, "tokens_seen": 1187512320 }, { "epoch": 0.36, "learning_rate": 0.0006465254373294817, "loss": 0.0766, "theoretical_loss": 3.6193137219513556, "tokens_seen": 1187774464 }, { "epoch": 0.36, "learning_rate": 0.0006464451933878992, "loss": 0.0759, "theoretical_loss": 3.619240691864243, "tokens_seen": 1188036608 }, { "epoch": 0.36, "learning_rate": 0.0006463649494463169, "loss": 0.0709, "theoretical_loss": 3.619167682400545, "tokens_seen": 1188298752 }, { "epoch": 0.36, "learning_rate": 0.0006462847055047344, "loss": 0.0769, "theoretical_loss": 3.619094693549889, "tokens_seen": 1188560896 }, { "epoch": 0.36, "learning_rate": 0.000646204461563152, "loss": 0.0729, "theoretical_loss": 3.6190217253019124, "tokens_seen": 1188823040 }, { "epoch": 0.36, "learning_rate": 0.0006461242176215696, "loss": 0.0747, "theoretical_loss": 3.6189487776462568, "tokens_seen": 1189085184 }, { "epoch": 0.36, "learning_rate": 0.0006460439736799871, "loss": 0.0706, "theoretical_loss": 3.6188758505725738, "tokens_seen": 1189347328 }, { "epoch": 0.36, "learning_rate": 0.0006459637297384047, "loss": 0.0767, "theoretical_loss": 3.618802944070522, "tokens_seen": 1189609472 }, { "epoch": 0.36, "learning_rate": 0.0006458834857968223, "loss": 0.0741, "theoretical_loss": 3.618730058129766, "tokens_seen": 1189871616 }, { "epoch": 0.36, "learning_rate": 0.00064580324185524, "loss": 0.0745, "theoretical_loss": 3.61865719273998, "tokens_seen": 1190133760 }, { "epoch": 0.36, "learning_rate": 0.0006457229979136575, "loss": 0.0736, "theoretical_loss": 3.6185843478908453, "tokens_seen": 1190395904 }, { "epoch": 0.36, "learning_rate": 0.0006456427539720752, "loss": 0.0736, "theoretical_loss": 3.618511523572049, "tokens_seen": 1190658048 }, { "epoch": 0.36, "learning_rate": 0.0006455625100304927, "loss": 0.0728, "theoretical_loss": 3.6184387197732875, "tokens_seen": 1190920192 }, { "epoch": 0.36, "learning_rate": 0.0006454822660889103, "loss": 0.0775, "theoretical_loss": 3.6183659364842624, "tokens_seen": 1191182336 }, { "epoch": 0.36, "learning_rate": 0.0006454020221473279, "loss": 0.0742, "theoretical_loss": 3.6182931736946857, "tokens_seen": 1191444480 }, { "epoch": 0.36, "learning_rate": 0.0006453217782057454, "loss": 0.0723, "theoretical_loss": 3.618220431394274, "tokens_seen": 1191706624 }, { "epoch": 0.36, "learning_rate": 0.0006452415342641631, "loss": 0.0704, "theoretical_loss": 3.618147709572754, "tokens_seen": 1191968768 }, { "epoch": 0.36, "learning_rate": 0.0006451612903225806, "loss": 0.0738, "theoretical_loss": 3.618075008219858, "tokens_seen": 1192230912 }, { "epoch": 0.36, "learning_rate": 0.0006450810463809983, "loss": 0.0715, "theoretical_loss": 3.6180023273253252, "tokens_seen": 1192493056 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.001384525210596621, "objective/train/docs_used": 436657, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4595648050308228, "objective/train/original_loss": 1.4595648050308228, "objective/train/theoretical_loss": 3.6179296668789043, "objective/train/tokens_used": 1213215200, "objective/train/value_avg": -0.01062774658203125, "objective/train/value_loss": 0.00031874870182946324, "objective/train/value_max": -0.00014317035675048828, "objective/train/value_min": -0.7646484375, "objective/train/value_reward_corr": 0.6390448707495002, "objective/train/value_std": 0.0160675048828125, "objective/train/weight_avg": 1.0015238523483276, "objective/train/weighted_lm_loss": 1.4609448909759521, "objective/train/weights_max": 1.3013832569122314, "objective/train/weights_min": 0.3687829375267029, "theoretical_loss": 3.6179296668789043, "tokens_seen": 1192755200 }, { "epoch": 0.36, "learning_rate": 0.0006450008024394159, "loss": 0.0753, "theoretical_loss": 3.6179296668789043, "tokens_seen": 1192755200 }, { "epoch": 0.36, "learning_rate": 0.0006449205584978334, "loss": 0.0736, "theoretical_loss": 3.6178570268703494, "tokens_seen": 1193017344 }, { "epoch": 0.36, "learning_rate": 0.000644840314556251, "loss": 0.0733, "theoretical_loss": 3.617784407289424, "tokens_seen": 1193279488 }, { "epoch": 0.36, "learning_rate": 0.0006447600706146686, "loss": 0.0747, "theoretical_loss": 3.617711808125896, "tokens_seen": 1193541632 }, { "epoch": 0.36, "learning_rate": 0.0006446798266730862, "loss": 0.0751, "theoretical_loss": 3.6176392293695434, "tokens_seen": 1193803776 }, { "epoch": 0.36, "learning_rate": 0.0006445995827315037, "loss": 0.0753, "theoretical_loss": 3.6175666710101506, "tokens_seen": 1194065920 }, { "epoch": 0.36, "learning_rate": 0.0006445193387899214, "loss": 0.0733, "theoretical_loss": 3.6174941330375097, "tokens_seen": 1194328064 }, { "epoch": 0.36, "learning_rate": 0.0006444390948483389, "loss": 0.0753, "theoretical_loss": 3.617421615441419, "tokens_seen": 1194590208 }, { "epoch": 0.36, "learning_rate": 0.0006443588509067564, "loss": 0.0777, "theoretical_loss": 3.617349118211685, "tokens_seen": 1194852352 }, { "epoch": 0.36, "learning_rate": 0.0006442786069651742, "loss": 0.0731, "theoretical_loss": 3.6172766413381225, "tokens_seen": 1195114496 }, { "epoch": 0.36, "learning_rate": 0.0006441983630235917, "loss": 0.0704, "theoretical_loss": 3.617204184810552, "tokens_seen": 1195376640 }, { "epoch": 0.36, "learning_rate": 0.0006441181190820094, "loss": 0.074, "theoretical_loss": 3.6171317486188013, "tokens_seen": 1195638784 }, { "epoch": 0.36, "learning_rate": 0.0006440378751404269, "loss": 0.0744, "theoretical_loss": 3.6170593327527074, "tokens_seen": 1195900928 }, { "epoch": 0.36, "learning_rate": 0.0006439576311988445, "loss": 0.0725, "theoretical_loss": 3.616986937202112, "tokens_seen": 1196163072 }, { "epoch": 0.36, "learning_rate": 0.0006438773872572621, "loss": 0.0745, "theoretical_loss": 3.616914561956867, "tokens_seen": 1196425216 }, { "epoch": 0.36, "learning_rate": 0.0006437971433156797, "loss": 0.0744, "theoretical_loss": 3.6168422070068287, "tokens_seen": 1196687360 }, { "epoch": 0.36, "learning_rate": 0.0006437168993740972, "loss": 0.0729, "theoretical_loss": 3.6167698723418624, "tokens_seen": 1196949504 }, { "epoch": 0.36, "learning_rate": 0.0006436366554325149, "loss": 0.076, "theoretical_loss": 3.6166975579518406, "tokens_seen": 1197211648 }, { "epoch": 0.36, "learning_rate": 0.0006435564114909325, "loss": 0.0729, "theoretical_loss": 3.6166252638266423, "tokens_seen": 1197473792 }, { "epoch": 0.36, "learning_rate": 0.00064347616754935, "loss": 0.0746, "theoretical_loss": 3.616552989956155, "tokens_seen": 1197735936 }, { "epoch": 0.36, "learning_rate": 0.0006433959236077677, "loss": 0.0745, "theoretical_loss": 3.616480736330272, "tokens_seen": 1197998080 }, { "epoch": 0.36, "learning_rate": 0.0006433156796661852, "loss": 0.0709, "theoretical_loss": 3.616408502938895, "tokens_seen": 1198260224 }, { "epoch": 0.36, "learning_rate": 0.0006432354357246028, "loss": 0.07, "theoretical_loss": 3.616336289771932, "tokens_seen": 1198522368 }, { "epoch": 0.36, "learning_rate": 0.0006431551917830204, "loss": 0.0712, "theoretical_loss": 3.6162640968192994, "tokens_seen": 1198784512 }, { "epoch": 0.36, "learning_rate": 0.0006430749478414379, "loss": 0.0726, "theoretical_loss": 3.61619192407092, "tokens_seen": 1199046656 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0016890951665118337, "objective/train/docs_used": 438488, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4938664436340332, "objective/train/original_loss": 1.493866205215454, "objective/train/theoretical_loss": 3.6161197715167237, "objective/train/tokens_used": 1219768800, "objective/train/value_avg": -0.00862884521484375, "objective/train/value_loss": 0.0003480366722214967, "objective/train/value_max": -0.00011146068572998047, "objective/train/value_min": -0.95068359375, "objective/train/value_reward_corr": 0.7120363623901087, "objective/train/value_std": 0.0188140869140625, "objective/train/weight_avg": 1.0018378496170044, "objective/train/weighted_lm_loss": 1.4969854354858398, "objective/train/weights_max": 1.3778945207595825, "objective/train/weights_min": 0.22749631106853485, "theoretical_loss": 3.6161197715167237, "tokens_seen": 1199308800 }, { "epoch": 0.36, "learning_rate": 0.0006429947038998556, "loss": 0.072, "theoretical_loss": 3.6161197715167237, "tokens_seen": 1199308800 }, { "epoch": 0.36, "learning_rate": 0.0006429144599582731, "loss": 0.0718, "theoretical_loss": 3.616047639146648, "tokens_seen": 1199570944 }, { "epoch": 0.36, "learning_rate": 0.0006428342160166908, "loss": 0.0716, "theoretical_loss": 3.6159755269506375, "tokens_seen": 1199833088 }, { "epoch": 0.36, "learning_rate": 0.0006427539720751084, "loss": 0.0734, "theoretical_loss": 3.6159034349186445, "tokens_seen": 1200095232 }, { "epoch": 0.36, "learning_rate": 0.000642673728133526, "loss": 0.0759, "theoretical_loss": 3.615831363040628, "tokens_seen": 1200357376 }, { "epoch": 0.36, "learning_rate": 0.0006425934841919435, "loss": 0.0709, "theoretical_loss": 3.615759311306553, "tokens_seen": 1200619520 }, { "epoch": 0.36, "learning_rate": 0.0006425132402503611, "loss": 0.0715, "theoretical_loss": 3.6156872797063944, "tokens_seen": 1200881664 }, { "epoch": 0.36, "learning_rate": 0.0006424329963087787, "loss": 0.0745, "theoretical_loss": 3.6156152682301324, "tokens_seen": 1201143808 }, { "epoch": 0.36, "learning_rate": 0.0006423527523671962, "loss": 0.0752, "theoretical_loss": 3.6155432768677542, "tokens_seen": 1201405952 }, { "epoch": 0.36, "learning_rate": 0.0006422725084256139, "loss": 0.0726, "theoretical_loss": 3.615471305609255, "tokens_seen": 1201668096 }, { "epoch": 0.36, "learning_rate": 0.0006421922644840314, "loss": 0.0736, "theoretical_loss": 3.6153993544446372, "tokens_seen": 1201930240 }, { "epoch": 0.36, "learning_rate": 0.0006421120205424491, "loss": 0.073, "theoretical_loss": 3.61532742336391, "tokens_seen": 1202192384 }, { "epoch": 0.36, "learning_rate": 0.0006420317766008667, "loss": 0.0709, "theoretical_loss": 3.6152555123570895, "tokens_seen": 1202454528 }, { "epoch": 0.36, "learning_rate": 0.0006419515326592842, "loss": 0.0685, "theoretical_loss": 3.6151836214141992, "tokens_seen": 1202716672 }, { "epoch": 0.36, "learning_rate": 0.0006418712887177018, "loss": 0.0737, "theoretical_loss": 3.61511175052527, "tokens_seen": 1202978816 }, { "epoch": 0.36, "learning_rate": 0.0006417910447761194, "loss": 0.0711, "theoretical_loss": 3.6150398996803395, "tokens_seen": 1203240960 }, { "epoch": 0.36, "learning_rate": 0.000641710800834537, "loss": 0.0732, "theoretical_loss": 3.6149680688694525, "tokens_seen": 1203503104 }, { "epoch": 0.36, "learning_rate": 0.0006416305568929546, "loss": 0.0737, "theoretical_loss": 3.614896258082661, "tokens_seen": 1203765248 }, { "epoch": 0.36, "learning_rate": 0.0006415503129513722, "loss": 0.071, "theoretical_loss": 3.614824467310025, "tokens_seen": 1204027392 }, { "epoch": 0.36, "learning_rate": 0.0006414700690097897, "loss": 0.0743, "theoretical_loss": 3.614752696541609, "tokens_seen": 1204289536 }, { "epoch": 0.37, "learning_rate": 0.0006413898250682074, "loss": 0.0732, "theoretical_loss": 3.614680945767488, "tokens_seen": 1204551680 }, { "epoch": 0.37, "learning_rate": 0.000641309581126625, "loss": 0.0728, "theoretical_loss": 3.614609214977741, "tokens_seen": 1204813824 }, { "epoch": 0.37, "learning_rate": 0.0006412293371850425, "loss": 0.0751, "theoretical_loss": 3.614537504162457, "tokens_seen": 1205075968 }, { "epoch": 0.37, "learning_rate": 0.0006411490932434602, "loss": 0.0736, "theoretical_loss": 3.614465813311729, "tokens_seen": 1205338112 }, { "epoch": 0.37, "learning_rate": 0.0006410688493018777, "loss": 0.0736, "theoretical_loss": 3.6143941424156596, "tokens_seen": 1205600256 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0002445022400934249, "objective/train/docs_used": 441116, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4677330255508423, "objective/train/original_loss": 1.4677330255508423, "objective/train/theoretical_loss": 3.614322491464357, "objective/train/tokens_used": 1226322400, "objective/train/value_avg": -0.01202392578125, "objective/train/value_loss": 0.0005131938378326595, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.91015625, "objective/train/value_reward_corr": 0.7615977591679253, "objective/train/value_std": 0.025634765625, "objective/train/weight_avg": 1.0004706382751465, "objective/train/weighted_lm_loss": 1.4688076972961426, "objective/train/weights_max": 1.7541979551315308, "objective/train/weights_min": 0.3682364523410797, "theoretical_loss": 3.614322491464357, "tokens_seen": 1205862400 }, { "epoch": 0.37, "learning_rate": 0.0006409886053602953, "loss": 0.0725, "theoretical_loss": 3.614322491464357, "tokens_seen": 1205862400 }, { "epoch": 0.37, "learning_rate": 0.0006409083614187129, "loss": 0.0761, "theoretical_loss": 3.614250860447936, "tokens_seen": 1206124544 }, { "epoch": 0.37, "learning_rate": 0.0006408281174771304, "loss": 0.0747, "theoretical_loss": 3.6141792493565212, "tokens_seen": 1206386688 }, { "epoch": 0.37, "learning_rate": 0.000640747873535548, "loss": 0.071, "theoretical_loss": 3.6141076581802416, "tokens_seen": 1206648832 }, { "epoch": 0.37, "learning_rate": 0.0006406676295939656, "loss": 0.0718, "theoretical_loss": 3.614036086909234, "tokens_seen": 1206910976 }, { "epoch": 0.37, "learning_rate": 0.0006405873856523833, "loss": 0.0748, "theoretical_loss": 3.6139645355336425, "tokens_seen": 1207173120 }, { "epoch": 0.37, "learning_rate": 0.0006405071417108009, "loss": 0.076, "theoretical_loss": 3.613893004043617, "tokens_seen": 1207435264 }, { "epoch": 0.37, "learning_rate": 0.0006404268977692185, "loss": 0.0723, "theoretical_loss": 3.6138214924293166, "tokens_seen": 1207697408 }, { "epoch": 0.37, "learning_rate": 0.000640346653827636, "loss": 0.0771, "theoretical_loss": 3.6137500006809056, "tokens_seen": 1207959552 }, { "epoch": 0.37, "learning_rate": 0.0006402664098860537, "loss": 0.074, "theoretical_loss": 3.6136785287885553, "tokens_seen": 1208221696 }, { "epoch": 0.37, "learning_rate": 0.0006401861659444712, "loss": 0.0736, "theoretical_loss": 3.6136070767424457, "tokens_seen": 1208483840 }, { "epoch": 0.37, "learning_rate": 0.0006401059220028887, "loss": 0.0746, "theoretical_loss": 3.6135356445327624, "tokens_seen": 1208745984 }, { "epoch": 0.37, "learning_rate": 0.0006400256780613064, "loss": 0.0692, "theoretical_loss": 3.6134642321496977, "tokens_seen": 1209008128 }, { "epoch": 0.37, "learning_rate": 0.000639945434119724, "loss": 0.0706, "theoretical_loss": 3.613392839583452, "tokens_seen": 1209270272 }, { "epoch": 0.37, "learning_rate": 0.0006398651901781416, "loss": 0.0741, "theoretical_loss": 3.6133214668242317, "tokens_seen": 1209532416 }, { "epoch": 0.37, "learning_rate": 0.0006397849462365592, "loss": 0.0709, "theoretical_loss": 3.613250113862251, "tokens_seen": 1209794560 }, { "epoch": 0.37, "learning_rate": 0.0006397047022949768, "loss": 0.0724, "theoretical_loss": 3.61317878068773, "tokens_seen": 1210056704 }, { "epoch": 0.37, "learning_rate": 0.0006396244583533943, "loss": 0.0715, "theoretical_loss": 3.6131074672908965, "tokens_seen": 1210318848 }, { "epoch": 0.37, "learning_rate": 0.0006395442144118119, "loss": 0.0715, "theoretical_loss": 3.6130361736619854, "tokens_seen": 1210580992 }, { "epoch": 0.37, "learning_rate": 0.0006394639704702295, "loss": 0.0739, "theoretical_loss": 3.6129648997912382, "tokens_seen": 1210843136 }, { "epoch": 0.37, "learning_rate": 0.000639383726528647, "loss": 0.0723, "theoretical_loss": 3.6128936456689034, "tokens_seen": 1211105280 }, { "epoch": 0.37, "learning_rate": 0.0006393034825870647, "loss": 0.0729, "theoretical_loss": 3.612822411285236, "tokens_seen": 1211367424 }, { "epoch": 0.37, "learning_rate": 0.0006392232386454822, "loss": 0.0755, "theoretical_loss": 3.612751196630499, "tokens_seen": 1211629568 }, { "epoch": 0.37, "learning_rate": 0.0006391429947039, "loss": 0.0725, "theoretical_loss": 3.61268000169496, "tokens_seen": 1211891712 }, { "epoch": 0.37, "learning_rate": 0.0006390627507623175, "loss": 0.0742, "theoretical_loss": 3.612608826468897, "tokens_seen": 1212153856 }, { "epoch": 0.37, "objective/train/advantage_avg": -0.0005174296093173325, "objective/train/docs_used": 443456, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.490218162536621, "objective/train/original_loss": 1.490218162536621, "objective/train/theoretical_loss": 3.6125376709425923, "objective/train/tokens_used": 1232876000, "objective/train/value_avg": -0.008087158203125, "objective/train/value_loss": 0.00029237871058285236, "objective/train/value_max": -7.599592208862305e-05, "objective/train/value_min": -0.3349609375, "objective/train/value_reward_corr": 0.6280084039572571, "objective/train/value_std": 0.01226806640625, "objective/train/weight_avg": 0.999613881111145, "objective/train/weighted_lm_loss": 1.4892261028289795, "objective/train/weights_max": 1.3555288314819336, "objective/train/weights_min": 0.3726279139518738, "theoretical_loss": 3.6125376709425923, "tokens_seen": 1212416000 }, { "epoch": 0.37, "learning_rate": 0.000638982506820735, "loss": 0.0738, "theoretical_loss": 3.6125376709425923, "tokens_seen": 1212416000 }, { "epoch": 0.37, "learning_rate": 0.0006389022628791527, "loss": 0.0683, "theoretical_loss": 3.6124665351063356, "tokens_seen": 1212678144 }, { "epoch": 0.37, "learning_rate": 0.0006388220189375702, "loss": 0.0737, "theoretical_loss": 3.612395418950424, "tokens_seen": 1212940288 }, { "epoch": 0.37, "learning_rate": 0.0006387417749959878, "loss": 0.0741, "theoretical_loss": 3.6123243224651604, "tokens_seen": 1213202432 }, { "epoch": 0.37, "learning_rate": 0.0006386615310544054, "loss": 0.0719, "theoretical_loss": 3.612253245640856, "tokens_seen": 1213464576 }, { "epoch": 0.37, "learning_rate": 0.000638581287112823, "loss": 0.0716, "theoretical_loss": 3.6121821884678287, "tokens_seen": 1213726720 }, { "epoch": 0.37, "learning_rate": 0.0006385010431712405, "loss": 0.0742, "theoretical_loss": 3.6121111509364017, "tokens_seen": 1213988864 }, { "epoch": 0.37, "learning_rate": 0.0006384207992296582, "loss": 0.0751, "theoretical_loss": 3.6120401330369067, "tokens_seen": 1214251008 }, { "epoch": 0.37, "learning_rate": 0.0006383405552880758, "loss": 0.0718, "theoretical_loss": 3.6119691347596814, "tokens_seen": 1214513152 }, { "epoch": 0.37, "learning_rate": 0.0006382603113464933, "loss": 0.0739, "theoretical_loss": 3.611898156095071, "tokens_seen": 1214775296 }, { "epoch": 0.37, "learning_rate": 0.000638180067404911, "loss": 0.0736, "theoretical_loss": 3.6118271970334264, "tokens_seen": 1215037440 }, { "epoch": 0.37, "learning_rate": 0.0006380998234633285, "loss": 0.0722, "theoretical_loss": 3.6117562575651068, "tokens_seen": 1215299584 }, { "epoch": 0.37, "learning_rate": 0.0006380195795217462, "loss": 0.0728, "theoretical_loss": 3.611685337680477, "tokens_seen": 1215561728 }, { "epoch": 0.37, "learning_rate": 0.0006379393355801637, "loss": 0.072, "theoretical_loss": 3.6116144373699086, "tokens_seen": 1215823872 }, { "epoch": 0.37, "learning_rate": 0.0006378590916385812, "loss": 0.0729, "theoretical_loss": 3.611543556623782, "tokens_seen": 1216086016 }, { "epoch": 0.37, "learning_rate": 0.0006377788476969989, "loss": 0.0728, "theoretical_loss": 3.6114726954324814, "tokens_seen": 1216348160 }, { "epoch": 0.37, "learning_rate": 0.0006376986037554164, "loss": 0.0723, "theoretical_loss": 3.6114018537864, "tokens_seen": 1216610304 }, { "epoch": 0.37, "learning_rate": 0.0006376183598138341, "loss": 0.0724, "theoretical_loss": 3.6113310316759373, "tokens_seen": 1216872448 }, { "epoch": 0.37, "learning_rate": 0.0006375381158722517, "loss": 0.0691, "theoretical_loss": 3.6112602290914984, "tokens_seen": 1217134592 }, { "epoch": 0.37, "learning_rate": 0.0006374578719306693, "loss": 0.0716, "theoretical_loss": 3.611189446023497, "tokens_seen": 1217396736 }, { "epoch": 0.37, "learning_rate": 0.0006373776279890868, "loss": 0.0725, "theoretical_loss": 3.6111186824623527, "tokens_seen": 1217658880 }, { "epoch": 0.37, "learning_rate": 0.0006372973840475045, "loss": 0.0737, "theoretical_loss": 3.6110479383984915, "tokens_seen": 1217921024 }, { "epoch": 0.37, "learning_rate": 0.000637217140105922, "loss": 0.0729, "theoretical_loss": 3.6109772138223466, "tokens_seen": 1218183168 }, { "epoch": 0.37, "learning_rate": 0.0006371368961643395, "loss": 0.0735, "theoretical_loss": 3.6109065087243577, "tokens_seen": 1218445312 }, { "epoch": 0.37, "learning_rate": 0.0006370566522227572, "loss": 0.0715, "theoretical_loss": 3.610835823094972, "tokens_seen": 1218707456 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0005563150043599308, "objective/train/docs_used": 445814, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4974902868270874, "objective/train/original_loss": 1.4974902868270874, "objective/train/theoretical_loss": 3.6107651569246424, "objective/train/tokens_used": 1239429600, "objective/train/value_avg": -0.0152435302734375, "objective/train/value_loss": 0.0005672244587913156, "objective/train/value_max": -0.0001055598258972168, "objective/train/value_min": -0.96044921875, "objective/train/value_reward_corr": 0.7760111785211823, "objective/train/value_std": 0.0281982421875, "objective/train/weight_avg": 1.0008221864700317, "objective/train/weighted_lm_loss": 1.4987361431121826, "objective/train/weights_max": 1.8249680995941162, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 3.6107651569246424, "tokens_seen": 1218969600 }, { "epoch": 0.37, "learning_rate": 0.0006369764082811747, "loss": 0.073, "theoretical_loss": 3.6107651569246424, "tokens_seen": 1218969600 }, { "epoch": 0.37, "learning_rate": 0.0006368961643395924, "loss": 0.0712, "theoretical_loss": 3.610694510203829, "tokens_seen": 1219231744 }, { "epoch": 0.37, "learning_rate": 0.00063681592039801, "loss": 0.0716, "theoretical_loss": 3.610623882922999, "tokens_seen": 1219493888 }, { "epoch": 0.37, "learning_rate": 0.0006367356764564276, "loss": 0.0716, "theoretical_loss": 3.6105532750726255, "tokens_seen": 1219756032 }, { "epoch": 0.37, "learning_rate": 0.0006366554325148452, "loss": 0.0717, "theoretical_loss": 3.6104826866431887, "tokens_seen": 1220018176 }, { "epoch": 0.37, "learning_rate": 0.0006365751885732627, "loss": 0.0717, "theoretical_loss": 3.6104121176251764, "tokens_seen": 1220280320 }, { "epoch": 0.37, "learning_rate": 0.0006364949446316803, "loss": 0.0723, "theoretical_loss": 3.6103415680090816, "tokens_seen": 1220542464 }, { "epoch": 0.37, "learning_rate": 0.0006364147006900979, "loss": 0.0739, "theoretical_loss": 3.6102710377854046, "tokens_seen": 1220804608 }, { "epoch": 0.37, "learning_rate": 0.0006363344567485155, "loss": 0.0704, "theoretical_loss": 3.610200526944652, "tokens_seen": 1221066752 }, { "epoch": 0.37, "learning_rate": 0.000636254212806933, "loss": 0.0721, "theoretical_loss": 3.610130035477339, "tokens_seen": 1221328896 }, { "epoch": 0.37, "learning_rate": 0.0006361739688653508, "loss": 0.0752, "theoretical_loss": 3.6100595633739854, "tokens_seen": 1221591040 }, { "epoch": 0.37, "learning_rate": 0.0006360937249237683, "loss": 0.0727, "theoretical_loss": 3.6099891106251176, "tokens_seen": 1221853184 }, { "epoch": 0.37, "learning_rate": 0.0006360134809821858, "loss": 0.0746, "theoretical_loss": 3.6099186772212697, "tokens_seen": 1222115328 }, { "epoch": 0.37, "learning_rate": 0.0006359332370406035, "loss": 0.0709, "theoretical_loss": 3.6098482631529825, "tokens_seen": 1222377472 }, { "epoch": 0.37, "learning_rate": 0.000635852993099021, "loss": 0.0727, "theoretical_loss": 3.6097778684108026, "tokens_seen": 1222639616 }, { "epoch": 0.37, "learning_rate": 0.0006357727491574386, "loss": 0.0716, "theoretical_loss": 3.609707492985284, "tokens_seen": 1222901760 }, { "epoch": 0.37, "learning_rate": 0.0006356925052158562, "loss": 0.0714, "theoretical_loss": 3.609637136866987, "tokens_seen": 1223163904 }, { "epoch": 0.37, "learning_rate": 0.0006356122612742738, "loss": 0.0697, "theoretical_loss": 3.609566800046478, "tokens_seen": 1223426048 }, { "epoch": 0.37, "learning_rate": 0.0006355320173326913, "loss": 0.0711, "theoretical_loss": 3.609496482514332, "tokens_seen": 1223688192 }, { "epoch": 0.37, "learning_rate": 0.000635451773391109, "loss": 0.0732, "theoretical_loss": 3.609426184261128, "tokens_seen": 1223950336 }, { "epoch": 0.37, "learning_rate": 0.0006353715294495266, "loss": 0.0735, "theoretical_loss": 3.6093559052774538, "tokens_seen": 1224212480 }, { "epoch": 0.37, "learning_rate": 0.0006352912855079442, "loss": 0.0715, "theoretical_loss": 3.6092856455539017, "tokens_seen": 1224474624 }, { "epoch": 0.37, "learning_rate": 0.0006352110415663618, "loss": 0.0745, "theoretical_loss": 3.6092154050810725, "tokens_seen": 1224736768 }, { "epoch": 0.37, "learning_rate": 0.0006351307976247793, "loss": 0.073, "theoretical_loss": 3.6091451838495727, "tokens_seen": 1224998912 }, { "epoch": 0.37, "learning_rate": 0.000635050553683197, "loss": 0.0714, "theoretical_loss": 3.609074981850016, "tokens_seen": 1225261056 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.000483504292787984, "objective/train/docs_used": 448161, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4086085557937622, "objective/train/original_loss": 1.4086084365844727, "objective/train/theoretical_loss": 3.6090047990730216, "objective/train/tokens_used": 1245983200, "objective/train/value_avg": -0.007053375244140625, "objective/train/value_loss": 0.0002645938075147569, "objective/train/value_max": -8.285045623779297e-05, "objective/train/value_min": -0.9697265625, "objective/train/value_reward_corr": 0.7658859999331806, "objective/train/value_std": 0.0177764892578125, "objective/train/weight_avg": 1.0006027221679688, "objective/train/weighted_lm_loss": 1.4089957475662231, "objective/train/weights_max": 1.5008656978607178, "objective/train/weights_min": 0.3695588707923889, "theoretical_loss": 3.6090047990730216, "tokens_seen": 1225523200 }, { "epoch": 0.37, "learning_rate": 0.0006349703097416145, "loss": 0.0747, "theoretical_loss": 3.6090047990730216, "tokens_seen": 1225523200 }, { "epoch": 0.37, "learning_rate": 0.000634890065800032, "loss": 0.0711, "theoretical_loss": 3.6089346355092164, "tokens_seen": 1225785344 }, { "epoch": 0.37, "learning_rate": 0.0006348098218584497, "loss": 0.0737, "theoretical_loss": 3.6088644911492334, "tokens_seen": 1226047488 }, { "epoch": 0.37, "learning_rate": 0.0006347295779168672, "loss": 0.0731, "theoretical_loss": 3.6087943659837114, "tokens_seen": 1226309632 }, { "epoch": 0.37, "learning_rate": 0.0006346493339752849, "loss": 0.0708, "theoretical_loss": 3.6087242600032976, "tokens_seen": 1226571776 }, { "epoch": 0.37, "learning_rate": 0.0006345690900337025, "loss": 0.0721, "theoretical_loss": 3.6086541731986443, "tokens_seen": 1226833920 }, { "epoch": 0.37, "learning_rate": 0.0006344888460921201, "loss": 0.0685, "theoretical_loss": 3.6085841055604106, "tokens_seen": 1227096064 }, { "epoch": 0.37, "learning_rate": 0.0006344086021505376, "loss": 0.073, "theoretical_loss": 3.608514057079262, "tokens_seen": 1227358208 }, { "epoch": 0.37, "learning_rate": 0.0006343283582089553, "loss": 0.0712, "theoretical_loss": 3.6084440277458714, "tokens_seen": 1227620352 }, { "epoch": 0.37, "learning_rate": 0.0006342481142673728, "loss": 0.0702, "theoretical_loss": 3.6083740175509176, "tokens_seen": 1227882496 }, { "epoch": 0.37, "learning_rate": 0.0006341678703257904, "loss": 0.0749, "theoretical_loss": 3.6083040264850856, "tokens_seen": 1228144640 }, { "epoch": 0.37, "learning_rate": 0.000634087626384208, "loss": 0.0711, "theoretical_loss": 3.6082340545390674, "tokens_seen": 1228406784 }, { "epoch": 0.37, "learning_rate": 0.0006340073824426255, "loss": 0.0732, "theoretical_loss": 3.6081641017035615, "tokens_seen": 1228668928 }, { "epoch": 0.37, "learning_rate": 0.0006339271385010433, "loss": 0.0748, "theoretical_loss": 3.608094167969273, "tokens_seen": 1228931072 }, { "epoch": 0.37, "learning_rate": 0.0006338468945594608, "loss": 0.0717, "theoretical_loss": 3.6080242533269136, "tokens_seen": 1229193216 }, { "epoch": 0.37, "learning_rate": 0.0006337666506178784, "loss": 0.0701, "theoretical_loss": 3.6079543577672, "tokens_seen": 1229455360 }, { "epoch": 0.37, "learning_rate": 0.000633686406676296, "loss": 0.074, "theoretical_loss": 3.6078844812808577, "tokens_seen": 1229717504 }, { "epoch": 0.37, "learning_rate": 0.0006336061627347135, "loss": 0.0758, "theoretical_loss": 3.6078146238586175, "tokens_seen": 1229979648 }, { "epoch": 0.37, "learning_rate": 0.0006335259187931311, "loss": 0.0737, "theoretical_loss": 3.6077447854912164, "tokens_seen": 1230241792 }, { "epoch": 0.37, "learning_rate": 0.0006334456748515487, "loss": 0.0721, "theoretical_loss": 3.6076749661693985, "tokens_seen": 1230503936 }, { "epoch": 0.37, "learning_rate": 0.0006333654309099663, "loss": 0.0742, "theoretical_loss": 3.607605165883914, "tokens_seen": 1230766080 }, { "epoch": 0.37, "learning_rate": 0.0006332851869683838, "loss": 0.0728, "theoretical_loss": 3.60753538462552, "tokens_seen": 1231028224 }, { "epoch": 0.37, "learning_rate": 0.0006332049430268016, "loss": 0.0718, "theoretical_loss": 3.607465622384979, "tokens_seen": 1231290368 }, { "epoch": 0.37, "learning_rate": 0.0006331246990852191, "loss": 0.0731, "theoretical_loss": 3.6073958791530614, "tokens_seen": 1231552512 }, { "epoch": 0.37, "learning_rate": 0.0006330444551436366, "loss": 0.0727, "theoretical_loss": 3.607326154920543, "tokens_seen": 1231814656 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0014651658711954951, "objective/train/docs_used": 450737, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4861600399017334, "objective/train/original_loss": 1.4861600399017334, "objective/train/theoretical_loss": 3.6072564496782062, "objective/train/tokens_used": 1252536800, "objective/train/value_avg": -0.01021575927734375, "objective/train/value_loss": 0.0003100598114542663, "objective/train/value_max": -0.0001055598258972168, "objective/train/value_min": -0.75244140625, "objective/train/value_reward_corr": 0.6669433027871567, "objective/train/value_std": 0.0163116455078125, "objective/train/weight_avg": 1.001604437828064, "objective/train/weighted_lm_loss": 1.4885464906692505, "objective/train/weights_max": 1.9935988187789917, "objective/train/weights_min": 0.36889123916625977, "theoretical_loss": 3.6072564496782062, "tokens_seen": 1232076800 }, { "epoch": 0.37, "learning_rate": 0.0006329642112020543, "loss": 0.0716, "theoretical_loss": 3.6072564496782062, "tokens_seen": 1232076800 }, { "epoch": 0.37, "learning_rate": 0.0006328839672604718, "loss": 0.0703, "theoretical_loss": 3.607186763416841, "tokens_seen": 1232338944 }, { "epoch": 0.37, "learning_rate": 0.0006328037233188895, "loss": 0.0728, "theoretical_loss": 3.607117096127242, "tokens_seen": 1232601088 }, { "epoch": 0.37, "learning_rate": 0.000632723479377307, "loss": 0.0744, "theoretical_loss": 3.60704744780021, "tokens_seen": 1232863232 }, { "epoch": 0.37, "learning_rate": 0.0006326432354357246, "loss": 0.0721, "theoretical_loss": 3.606977818426555, "tokens_seen": 1233125376 }, { "epoch": 0.37, "learning_rate": 0.0006325629914941422, "loss": 0.0745, "theoretical_loss": 3.6069082079970913, "tokens_seen": 1233387520 }, { "epoch": 0.37, "learning_rate": 0.0006324827475525598, "loss": 0.0739, "theoretical_loss": 3.60683861650264, "tokens_seen": 1233649664 }, { "epoch": 0.37, "learning_rate": 0.0006324025036109774, "loss": 0.0708, "theoretical_loss": 3.606769043934027, "tokens_seen": 1233911808 }, { "epoch": 0.37, "learning_rate": 0.000632322259669395, "loss": 0.0736, "theoretical_loss": 3.606699490282088, "tokens_seen": 1234173952 }, { "epoch": 0.37, "learning_rate": 0.0006322420157278126, "loss": 0.0699, "theoretical_loss": 3.606629955537663, "tokens_seen": 1234436096 }, { "epoch": 0.37, "learning_rate": 0.0006321617717862301, "loss": 0.0711, "theoretical_loss": 3.606560439691598, "tokens_seen": 1234698240 }, { "epoch": 0.37, "learning_rate": 0.0006320815278446478, "loss": 0.0722, "theoretical_loss": 3.6064909427347462, "tokens_seen": 1234960384 }, { "epoch": 0.37, "learning_rate": 0.0006320012839030653, "loss": 0.0733, "theoretical_loss": 3.6064214646579673, "tokens_seen": 1235222528 }, { "epoch": 0.37, "learning_rate": 0.0006319210399614828, "loss": 0.0743, "theoretical_loss": 3.606352005452126, "tokens_seen": 1235484672 }, { "epoch": 0.37, "learning_rate": 0.0006318407960199005, "loss": 0.0728, "theoretical_loss": 3.6062825651080956, "tokens_seen": 1235746816 }, { "epoch": 0.37, "learning_rate": 0.000631760552078318, "loss": 0.0736, "theoretical_loss": 3.6062131436167544, "tokens_seen": 1236008960 }, { "epoch": 0.37, "learning_rate": 0.0006316803081367358, "loss": 0.0717, "theoretical_loss": 3.606143740968986, "tokens_seen": 1236271104 }, { "epoch": 0.37, "learning_rate": 0.0006316000641951533, "loss": 0.0749, "theoretical_loss": 3.6060743571556833, "tokens_seen": 1236533248 }, { "epoch": 0.37, "learning_rate": 0.0006315198202535709, "loss": 0.0741, "theoretical_loss": 3.606004992167742, "tokens_seen": 1236795392 }, { "epoch": 0.37, "learning_rate": 0.0006314395763119885, "loss": 0.0716, "theoretical_loss": 3.605935645996067, "tokens_seen": 1237057536 }, { "epoch": 0.37, "learning_rate": 0.000631359332370406, "loss": 0.0698, "theoretical_loss": 3.605866318631568, "tokens_seen": 1237319680 }, { "epoch": 0.38, "learning_rate": 0.0006312790884288236, "loss": 0.0712, "theoretical_loss": 3.605797010065161, "tokens_seen": 1237581824 }, { "epoch": 0.38, "learning_rate": 0.0006311988444872412, "loss": 0.0714, "theoretical_loss": 3.6057277202877698, "tokens_seen": 1237843968 }, { "epoch": 0.38, "learning_rate": 0.0006311186005456588, "loss": 0.0719, "theoretical_loss": 3.6056584492903223, "tokens_seen": 1238106112 }, { "epoch": 0.38, "learning_rate": 0.0006310383566040763, "loss": 0.0714, "theoretical_loss": 3.6055891970637544, "tokens_seen": 1238368256 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0007562974351458251, "objective/train/docs_used": 453261, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5378789901733398, "objective/train/original_loss": 1.5378789901733398, "objective/train/theoretical_loss": 3.605519963599008, "objective/train/tokens_used": 1259090400, "objective/train/value_avg": -0.009429931640625, "objective/train/value_loss": 0.0003417150001041591, "objective/train/value_max": -0.00011593103408813477, "objective/train/value_min": -0.72119140625, "objective/train/value_reward_corr": 0.7248880939979041, "objective/train/value_std": 0.0178985595703125, "objective/train/weight_avg": 1.0009098052978516, "objective/train/weighted_lm_loss": 1.539913296699524, "objective/train/weights_max": 1.2381511926651, "objective/train/weights_min": 0.3683769106864929, "theoretical_loss": 3.605519963599008, "tokens_seen": 1238630400 }, { "epoch": 0.38, "learning_rate": 0.0006309581126624941, "loss": 0.0748, "theoretical_loss": 3.605519963599008, "tokens_seen": 1238630400 }, { "epoch": 0.38, "learning_rate": 0.0006308778687209116, "loss": 0.0723, "theoretical_loss": 3.60545074888703, "tokens_seen": 1238892544 }, { "epoch": 0.38, "learning_rate": 0.0006307976247793292, "loss": 0.0714, "theoretical_loss": 3.6053815529187756, "tokens_seen": 1239154688 }, { "epoch": 0.38, "learning_rate": 0.0006307173808377468, "loss": 0.0739, "theoretical_loss": 3.6053123756852052, "tokens_seen": 1239416832 }, { "epoch": 0.38, "learning_rate": 0.0006306371368961643, "loss": 0.0709, "theoretical_loss": 3.605243217177285, "tokens_seen": 1239678976 }, { "epoch": 0.38, "learning_rate": 0.0006305568929545819, "loss": 0.0711, "theoretical_loss": 3.6051740773859877, "tokens_seen": 1239941120 }, { "epoch": 0.38, "learning_rate": 0.0006304766490129995, "loss": 0.0729, "theoretical_loss": 3.605104956302293, "tokens_seen": 1240203264 }, { "epoch": 0.38, "learning_rate": 0.0006303964050714171, "loss": 0.0728, "theoretical_loss": 3.605035853917187, "tokens_seen": 1240465408 }, { "epoch": 0.38, "learning_rate": 0.0006303161611298347, "loss": 0.0748, "theoretical_loss": 3.604966770221661, "tokens_seen": 1240727552 }, { "epoch": 0.38, "learning_rate": 0.0006302359171882524, "loss": 0.0696, "theoretical_loss": 3.604897705206713, "tokens_seen": 1240989696 }, { "epoch": 0.38, "learning_rate": 0.0006301556732466699, "loss": 0.0723, "theoretical_loss": 3.6048286588633465, "tokens_seen": 1241251840 }, { "epoch": 0.38, "learning_rate": 0.0006300754293050875, "loss": 0.0747, "theoretical_loss": 3.6047596311825725, "tokens_seen": 1241513984 }, { "epoch": 0.38, "learning_rate": 0.0006299951853635051, "loss": 0.0746, "theoretical_loss": 3.6046906221554087, "tokens_seen": 1241776128 }, { "epoch": 0.38, "learning_rate": 0.0006299149414219226, "loss": 0.0737, "theoretical_loss": 3.6046216317728765, "tokens_seen": 1242038272 }, { "epoch": 0.38, "learning_rate": 0.0006298346974803403, "loss": 0.0722, "theoretical_loss": 3.6045526600260054, "tokens_seen": 1242300416 }, { "epoch": 0.38, "learning_rate": 0.0006297544535387578, "loss": 0.0716, "theoretical_loss": 3.6044837069058318, "tokens_seen": 1242562560 }, { "epoch": 0.38, "learning_rate": 0.0006296742095971754, "loss": 0.0751, "theoretical_loss": 3.6044147724033957, "tokens_seen": 1242824704 }, { "epoch": 0.38, "learning_rate": 0.000629593965655593, "loss": 0.0752, "theoretical_loss": 3.6043458565097453, "tokens_seen": 1243086848 }, { "epoch": 0.38, "learning_rate": 0.0006295137217140106, "loss": 0.07, "theoretical_loss": 3.6042769592159356, "tokens_seen": 1243348992 }, { "epoch": 0.38, "learning_rate": 0.0006294334777724282, "loss": 0.0724, "theoretical_loss": 3.6042080805130254, "tokens_seen": 1243611136 }, { "epoch": 0.38, "learning_rate": 0.0006293532338308458, "loss": 0.0749, "theoretical_loss": 3.6041392203920815, "tokens_seen": 1243873280 }, { "epoch": 0.38, "learning_rate": 0.0006292729898892634, "loss": 0.0747, "theoretical_loss": 3.6040703788441757, "tokens_seen": 1244135424 }, { "epoch": 0.38, "learning_rate": 0.0006291927459476809, "loss": 0.0744, "theoretical_loss": 3.6040015558603877, "tokens_seen": 1244397568 }, { "epoch": 0.38, "learning_rate": 0.0006291125020060986, "loss": 0.073, "theoretical_loss": 3.603932751431802, "tokens_seen": 1244659712 }, { "epoch": 0.38, "learning_rate": 0.0006290322580645161, "loss": 0.072, "theoretical_loss": 3.6038639655495093, "tokens_seen": 1244921856 }, { "epoch": 0.38, "objective/train/advantage_avg": -0.000271944678388536, "objective/train/docs_used": 455531, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5588250160217285, "objective/train/original_loss": 1.558824896812439, "objective/train/theoretical_loss": 3.6037951982046064, "objective/train/tokens_used": 1265644000, "objective/train/value_avg": -0.009490966796875, "objective/train/value_loss": 0.000395492505049333, "objective/train/value_max": -8.153915405273438e-05, "objective/train/value_min": -0.5654296875, "objective/train/value_reward_corr": 0.6741612325784748, "objective/train/value_std": 0.01556396484375, "objective/train/weight_avg": 0.9999120831489563, "objective/train/weighted_lm_loss": 1.55759859085083, "objective/train/weights_max": 1.5399620532989502, "objective/train/weights_min": 0.3693220913410187, "theoretical_loss": 3.6037951982046064, "tokens_seen": 1245184000 }, { "epoch": 0.38, "learning_rate": 0.0006289520141229337, "loss": 0.0727, "theoretical_loss": 3.6037951982046064, "tokens_seen": 1245184000 }, { "epoch": 0.38, "learning_rate": 0.0006288717701813513, "loss": 0.0728, "theoretical_loss": 3.6037264493881973, "tokens_seen": 1245446144 }, { "epoch": 0.38, "learning_rate": 0.0006287915262397688, "loss": 0.075, "theoretical_loss": 3.603657719091391, "tokens_seen": 1245708288 }, { "epoch": 0.38, "learning_rate": 0.0006287112822981866, "loss": 0.0731, "theoretical_loss": 3.603589007305303, "tokens_seen": 1245970432 }, { "epoch": 0.38, "learning_rate": 0.0006286310383566041, "loss": 0.075, "theoretical_loss": 3.603520314021055, "tokens_seen": 1246232576 }, { "epoch": 0.38, "learning_rate": 0.0006285507944150217, "loss": 0.0718, "theoretical_loss": 3.6034516392297746, "tokens_seen": 1246494720 }, { "epoch": 0.38, "learning_rate": 0.0006284705504734393, "loss": 0.076, "theoretical_loss": 3.6033829829225965, "tokens_seen": 1246756864 }, { "epoch": 0.38, "learning_rate": 0.0006283903065318568, "loss": 0.075, "theoretical_loss": 3.6033143450906593, "tokens_seen": 1247019008 }, { "epoch": 0.38, "learning_rate": 0.0006283100625902744, "loss": 0.0732, "theoretical_loss": 3.6032457257251105, "tokens_seen": 1247281152 }, { "epoch": 0.38, "learning_rate": 0.000628229818648692, "loss": 0.0701, "theoretical_loss": 3.603177124817101, "tokens_seen": 1247543296 }, { "epoch": 0.38, "learning_rate": 0.0006281495747071096, "loss": 0.0724, "theoretical_loss": 3.603108542357791, "tokens_seen": 1247805440 }, { "epoch": 0.38, "learning_rate": 0.0006280693307655271, "loss": 0.0741, "theoretical_loss": 3.603039978338343, "tokens_seen": 1248067584 }, { "epoch": 0.38, "learning_rate": 0.0006279890868239449, "loss": 0.0714, "theoretical_loss": 3.6029714327499285, "tokens_seen": 1248329728 }, { "epoch": 0.38, "learning_rate": 0.0006279088428823624, "loss": 0.0752, "theoretical_loss": 3.602902905583724, "tokens_seen": 1248591872 }, { "epoch": 0.38, "learning_rate": 0.0006278285989407801, "loss": 0.0717, "theoretical_loss": 3.602834396830912, "tokens_seen": 1248854016 }, { "epoch": 0.38, "learning_rate": 0.0006277483549991976, "loss": 0.0725, "theoretical_loss": 3.6027659064826816, "tokens_seen": 1249116160 }, { "epoch": 0.38, "learning_rate": 0.0006276681110576151, "loss": 0.0745, "theoretical_loss": 3.602697434530227, "tokens_seen": 1249378304 }, { "epoch": 0.38, "learning_rate": 0.0006275878671160328, "loss": 0.0745, "theoretical_loss": 3.6026289809647487, "tokens_seen": 1249640448 }, { "epoch": 0.38, "learning_rate": 0.0006275076231744503, "loss": 0.0741, "theoretical_loss": 3.6025605457774548, "tokens_seen": 1249902592 }, { "epoch": 0.38, "learning_rate": 0.0006274273792328679, "loss": 0.0722, "theoretical_loss": 3.602492128959558, "tokens_seen": 1250164736 }, { "epoch": 0.38, "learning_rate": 0.0006273471352912855, "loss": 0.073, "theoretical_loss": 3.6024237305022764, "tokens_seen": 1250426880 }, { "epoch": 0.38, "learning_rate": 0.0006272668913497032, "loss": 0.0741, "theoretical_loss": 3.602355350396836, "tokens_seen": 1250689024 }, { "epoch": 0.38, "learning_rate": 0.0006271866474081207, "loss": 0.072, "theoretical_loss": 3.602286988634467, "tokens_seen": 1250951168 }, { "epoch": 0.38, "learning_rate": 0.0006271064034665383, "loss": 0.0719, "theoretical_loss": 3.6022186452064076, "tokens_seen": 1251213312 }, { "epoch": 0.38, "learning_rate": 0.0006270261595249559, "loss": 0.0717, "theoretical_loss": 3.6021503201039, "tokens_seen": 1251475456 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0007342287572100759, "objective/train/docs_used": 458046, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4588574171066284, "objective/train/original_loss": 1.4588572978973389, "objective/train/theoretical_loss": 3.6020820133181934, "objective/train/tokens_used": 1272197600, "objective/train/value_avg": -0.006072998046875, "objective/train/value_loss": 0.00018203555373474956, "objective/train/value_max": -6.157159805297852e-05, "objective/train/value_min": -0.29638671875, "objective/train/value_reward_corr": 0.6572453525608541, "objective/train/value_std": 0.0111083984375, "objective/train/weight_avg": 1.0008169412612915, "objective/train/weighted_lm_loss": 1.460231065750122, "objective/train/weights_max": 1.3257553577423096, "objective/train/weights_min": 0.3682669997215271, "theoretical_loss": 3.6020820133181934, "tokens_seen": 1251737600 }, { "epoch": 0.38, "learning_rate": 0.0006269459155833734, "loss": 0.0709, "theoretical_loss": 3.6020820133181934, "tokens_seen": 1251737600 }, { "epoch": 0.38, "learning_rate": 0.0006268656716417911, "loss": 0.0742, "theoretical_loss": 3.6020137248405435, "tokens_seen": 1251999744 }, { "epoch": 0.38, "learning_rate": 0.0006267854277002086, "loss": 0.0733, "theoretical_loss": 3.6019454546622107, "tokens_seen": 1252261888 }, { "epoch": 0.38, "learning_rate": 0.0006267051837586262, "loss": 0.0712, "theoretical_loss": 3.601877202774463, "tokens_seen": 1252524032 }, { "epoch": 0.38, "learning_rate": 0.0006266249398170438, "loss": 0.0727, "theoretical_loss": 3.601808969168573, "tokens_seen": 1252786176 }, { "epoch": 0.38, "learning_rate": 0.0006265446958754613, "loss": 0.0721, "theoretical_loss": 3.60174075383582, "tokens_seen": 1253048320 }, { "epoch": 0.38, "learning_rate": 0.0006264644519338791, "loss": 0.0753, "theoretical_loss": 3.6016725567674883, "tokens_seen": 1253310464 }, { "epoch": 0.38, "learning_rate": 0.0006263842079922966, "loss": 0.0728, "theoretical_loss": 3.60160437795487, "tokens_seen": 1253572608 }, { "epoch": 0.38, "learning_rate": 0.0006263039640507142, "loss": 0.073, "theoretical_loss": 3.6015362173892624, "tokens_seen": 1253834752 }, { "epoch": 0.38, "learning_rate": 0.0006262237201091318, "loss": 0.0745, "theoretical_loss": 3.6014680750619674, "tokens_seen": 1254096896 }, { "epoch": 0.38, "learning_rate": 0.0006261434761675494, "loss": 0.0726, "theoretical_loss": 3.6013999509642947, "tokens_seen": 1254359040 }, { "epoch": 0.38, "learning_rate": 0.0006260632322259669, "loss": 0.0723, "theoretical_loss": 3.601331845087559, "tokens_seen": 1254621184 }, { "epoch": 0.38, "learning_rate": 0.0006259829882843845, "loss": 0.073, "theoretical_loss": 3.601263757423082, "tokens_seen": 1254883328 }, { "epoch": 0.38, "learning_rate": 0.0006259027443428021, "loss": 0.073, "theoretical_loss": 3.601195687962189, "tokens_seen": 1255145472 }, { "epoch": 0.38, "learning_rate": 0.0006258225004012196, "loss": 0.0718, "theoretical_loss": 3.601127636696214, "tokens_seen": 1255407616 }, { "epoch": 0.38, "learning_rate": 0.0006257422564596374, "loss": 0.0725, "theoretical_loss": 3.6010596036164957, "tokens_seen": 1255669760 }, { "epoch": 0.38, "learning_rate": 0.0006256620125180549, "loss": 0.0724, "theoretical_loss": 3.600991588714378, "tokens_seen": 1255931904 }, { "epoch": 0.38, "learning_rate": 0.0006255817685764725, "loss": 0.0705, "theoretical_loss": 3.6009235919812124, "tokens_seen": 1256194048 }, { "epoch": 0.38, "learning_rate": 0.0006255015246348901, "loss": 0.0717, "theoretical_loss": 3.6008556134083545, "tokens_seen": 1256456192 }, { "epoch": 0.38, "learning_rate": 0.0006254212806933076, "loss": 0.0733, "theoretical_loss": 3.600787652987167, "tokens_seen": 1256718336 }, { "epoch": 0.38, "learning_rate": 0.0006253410367517252, "loss": 0.0717, "theoretical_loss": 3.6007197107090185, "tokens_seen": 1256980480 }, { "epoch": 0.38, "learning_rate": 0.0006252607928101428, "loss": 0.0736, "theoretical_loss": 3.600651786565283, "tokens_seen": 1257242624 }, { "epoch": 0.38, "learning_rate": 0.0006251805488685604, "loss": 0.0718, "theoretical_loss": 3.6005838805473402, "tokens_seen": 1257504768 }, { "epoch": 0.38, "learning_rate": 0.000625100304926978, "loss": 0.0717, "theoretical_loss": 3.600515992646577, "tokens_seen": 1257766912 }, { "epoch": 0.38, "learning_rate": 0.0006250200609853957, "loss": 0.0719, "theoretical_loss": 3.6004481228543854, "tokens_seen": 1258029056 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0012113068951293826, "objective/train/docs_used": 460354, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4824901819229126, "objective/train/original_loss": 1.482490062713623, "objective/train/theoretical_loss": 3.600380271162162, "objective/train/tokens_used": 1278751200, "objective/train/value_avg": -0.01016998291015625, "objective/train/value_loss": 0.00046331010526046157, "objective/train/value_max": -0.00010889768600463867, "objective/train/value_min": -0.97119140625, "objective/train/value_reward_corr": 0.7243134544742209, "objective/train/value_std": 0.0211181640625, "objective/train/weight_avg": 1.0014119148254395, "objective/train/weighted_lm_loss": 1.484576940536499, "objective/train/weights_max": 1.8089985847473145, "objective/train/weights_min": 0.36829686164855957, "theoretical_loss": 3.600380271162162, "tokens_seen": 1258291200 }, { "epoch": 0.38, "learning_rate": 0.0006249398170438132, "loss": 0.0709, "theoretical_loss": 3.600380271162162, "tokens_seen": 1258291200 }, { "epoch": 0.38, "learning_rate": 0.0006248595731022309, "loss": 0.0693, "theoretical_loss": 3.6003124375613114, "tokens_seen": 1258553344 }, { "epoch": 0.38, "learning_rate": 0.0006247793291606484, "loss": 0.0746, "theoretical_loss": 3.6002446220432427, "tokens_seen": 1258815488 }, { "epoch": 0.38, "learning_rate": 0.0006246990852190659, "loss": 0.074, "theoretical_loss": 3.6001768245993713, "tokens_seen": 1259077632 }, { "epoch": 0.38, "learning_rate": 0.0006246188412774836, "loss": 0.0731, "theoretical_loss": 3.600109045221119, "tokens_seen": 1259339776 }, { "epoch": 0.38, "learning_rate": 0.0006245385973359011, "loss": 0.0749, "theoretical_loss": 3.600041283899912, "tokens_seen": 1259601920 }, { "epoch": 0.38, "learning_rate": 0.0006244583533943187, "loss": 0.0713, "theoretical_loss": 3.599973540627184, "tokens_seen": 1259864064 }, { "epoch": 0.38, "learning_rate": 0.0006243781094527363, "loss": 0.0742, "theoretical_loss": 3.599905815394374, "tokens_seen": 1260126208 }, { "epoch": 0.38, "learning_rate": 0.000624297865511154, "loss": 0.0736, "theoretical_loss": 3.599838108192926, "tokens_seen": 1260388352 }, { "epoch": 0.38, "learning_rate": 0.0006242176215695715, "loss": 0.0724, "theoretical_loss": 3.5997704190142907, "tokens_seen": 1260650496 }, { "epoch": 0.38, "learning_rate": 0.0006241373776279891, "loss": 0.0741, "theoretical_loss": 3.599702747849925, "tokens_seen": 1260912640 }, { "epoch": 0.38, "learning_rate": 0.0006240571336864067, "loss": 0.073, "theoretical_loss": 3.5996350946912896, "tokens_seen": 1261174784 }, { "epoch": 0.38, "learning_rate": 0.0006239768897448243, "loss": 0.0715, "theoretical_loss": 3.5995674595298537, "tokens_seen": 1261436928 }, { "epoch": 0.38, "learning_rate": 0.0006238966458032419, "loss": 0.072, "theoretical_loss": 3.5994998423570914, "tokens_seen": 1261699072 }, { "epoch": 0.38, "learning_rate": 0.0006238164018616594, "loss": 0.0714, "theoretical_loss": 3.599432243164481, "tokens_seen": 1261961216 }, { "epoch": 0.38, "learning_rate": 0.0006237361579200771, "loss": 0.0713, "theoretical_loss": 3.599364661943509, "tokens_seen": 1262223360 }, { "epoch": 0.38, "learning_rate": 0.0006236559139784946, "loss": 0.0726, "theoretical_loss": 3.599297098685666, "tokens_seen": 1262485504 }, { "epoch": 0.38, "learning_rate": 0.0006235756700369121, "loss": 0.071, "theoretical_loss": 3.599229553382449, "tokens_seen": 1262747648 }, { "epoch": 0.38, "learning_rate": 0.0006234954260953299, "loss": 0.0725, "theoretical_loss": 3.599162026025361, "tokens_seen": 1263009792 }, { "epoch": 0.38, "learning_rate": 0.0006234151821537474, "loss": 0.0705, "theoretical_loss": 3.599094516605911, "tokens_seen": 1263271936 }, { "epoch": 0.38, "learning_rate": 0.000623334938212165, "loss": 0.0738, "theoretical_loss": 3.5990270251156122, "tokens_seen": 1263534080 }, { "epoch": 0.38, "learning_rate": 0.0006232546942705826, "loss": 0.0746, "theoretical_loss": 3.5989595515459856, "tokens_seen": 1263796224 }, { "epoch": 0.38, "learning_rate": 0.0006231744503290002, "loss": 0.0743, "theoretical_loss": 3.598892095888557, "tokens_seen": 1264058368 }, { "epoch": 0.38, "learning_rate": 0.0006230942063874177, "loss": 0.0716, "theoretical_loss": 3.5988246581348573, "tokens_seen": 1264320512 }, { "epoch": 0.38, "learning_rate": 0.0006230139624458353, "loss": 0.073, "theoretical_loss": 3.5987572382764252, "tokens_seen": 1264582656 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0005436477949842811, "objective/train/docs_used": 462829, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.450318455696106, "objective/train/original_loss": 1.4503185749053955, "objective/train/theoretical_loss": 3.5986898363048025, "objective/train/tokens_used": 1285304800, "objective/train/value_avg": -0.007289886474609375, "objective/train/value_loss": 0.00032011562143452466, "objective/train/value_max": -8.285045623779297e-05, "objective/train/value_min": -0.841796875, "objective/train/value_reward_corr": 0.5236847438293369, "objective/train/value_std": 0.011871337890625, "objective/train/weight_avg": 1.0006617307662964, "objective/train/weighted_lm_loss": 1.4513938426971436, "objective/train/weights_max": 1.2407280206680298, "objective/train/weights_min": 0.05488771200180054, "theoretical_loss": 3.5986898363048025, "tokens_seen": 1264844800 }, { "epoch": 0.38, "learning_rate": 0.0006229337185042529, "loss": 0.0724, "theoretical_loss": 3.5986898363048025, "tokens_seen": 1264844800 }, { "epoch": 0.38, "learning_rate": 0.0006228534745626704, "loss": 0.0713, "theoretical_loss": 3.5986224522115395, "tokens_seen": 1265106944 }, { "epoch": 0.38, "learning_rate": 0.0006227732306210882, "loss": 0.0704, "theoretical_loss": 3.59855508598819, "tokens_seen": 1265369088 }, { "epoch": 0.38, "learning_rate": 0.0006226929866795057, "loss": 0.0736, "theoretical_loss": 3.5984877376263142, "tokens_seen": 1265631232 }, { "epoch": 0.38, "learning_rate": 0.0006226127427379234, "loss": 0.0699, "theoretical_loss": 3.5984204071174783, "tokens_seen": 1265893376 }, { "epoch": 0.38, "learning_rate": 0.0006225324987963409, "loss": 0.0763, "theoretical_loss": 3.598353094453255, "tokens_seen": 1266155520 }, { "epoch": 0.38, "learning_rate": 0.0006224522548547584, "loss": 0.0724, "theoretical_loss": 3.598285799625221, "tokens_seen": 1266417664 }, { "epoch": 0.38, "learning_rate": 0.0006223720109131761, "loss": 0.072, "theoretical_loss": 3.5982185226249594, "tokens_seen": 1266679808 }, { "epoch": 0.38, "learning_rate": 0.0006222917669715936, "loss": 0.0718, "theoretical_loss": 3.5981512634440604, "tokens_seen": 1266941952 }, { "epoch": 0.38, "learning_rate": 0.0006222115230300112, "loss": 0.0725, "theoretical_loss": 3.5980840220741177, "tokens_seen": 1267204096 }, { "epoch": 0.38, "learning_rate": 0.0006221312790884289, "loss": 0.0748, "theoretical_loss": 3.5980167985067317, "tokens_seen": 1267466240 }, { "epoch": 0.38, "learning_rate": 0.0006220510351468465, "loss": 0.074, "theoretical_loss": 3.5979495927335092, "tokens_seen": 1267728384 }, { "epoch": 0.38, "learning_rate": 0.000621970791205264, "loss": 0.0727, "theoretical_loss": 3.5978824047460614, "tokens_seen": 1267990528 }, { "epoch": 0.38, "learning_rate": 0.0006218905472636816, "loss": 0.0732, "theoretical_loss": 3.5978152345360055, "tokens_seen": 1268252672 }, { "epoch": 0.38, "learning_rate": 0.0006218103033220992, "loss": 0.0723, "theoretical_loss": 3.5977480820949657, "tokens_seen": 1268514816 }, { "epoch": 0.38, "learning_rate": 0.0006217300593805167, "loss": 0.0741, "theoretical_loss": 3.5976809474145703, "tokens_seen": 1268776960 }, { "epoch": 0.38, "learning_rate": 0.0006216498154389344, "loss": 0.073, "theoretical_loss": 3.597613830486454, "tokens_seen": 1269039104 }, { "epoch": 0.38, "learning_rate": 0.0006215695714973519, "loss": 0.0717, "theoretical_loss": 3.5975467313022564, "tokens_seen": 1269301248 }, { "epoch": 0.38, "learning_rate": 0.0006214893275557696, "loss": 0.0734, "theoretical_loss": 3.5974796498536237, "tokens_seen": 1269563392 }, { "epoch": 0.38, "learning_rate": 0.0006214090836141871, "loss": 0.0708, "theoretical_loss": 3.597412586132208, "tokens_seen": 1269825536 }, { "epoch": 0.38, "learning_rate": 0.0006213288396726048, "loss": 0.0728, "theoretical_loss": 3.5973455401296652, "tokens_seen": 1270087680 }, { "epoch": 0.38, "learning_rate": 0.0006212485957310224, "loss": 0.0698, "theoretical_loss": 3.597278511837659, "tokens_seen": 1270349824 }, { "epoch": 0.39, "learning_rate": 0.0006211683517894399, "loss": 0.0713, "theoretical_loss": 3.597211501247858, "tokens_seen": 1270611968 }, { "epoch": 0.39, "learning_rate": 0.0006210881078478575, "loss": 0.07, "theoretical_loss": 3.5971445083519358, "tokens_seen": 1270874112 }, { "epoch": 0.39, "learning_rate": 0.0006210078639062751, "loss": 0.0732, "theoretical_loss": 3.5970775331415723, "tokens_seen": 1271136256 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0007621328113600612, "objective/train/docs_used": 465264, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4400675296783447, "objective/train/original_loss": 1.4400676488876343, "objective/train/theoretical_loss": 3.5970105756084525, "objective/train/tokens_used": 1291858400, "objective/train/value_avg": -0.007965087890625, "objective/train/value_loss": 0.00021634330914821476, "objective/train/value_max": -9.101629257202148e-05, "objective/train/value_min": -0.335205078125, "objective/train/value_reward_corr": 0.6737940618129363, "objective/train/value_std": 0.0167388916015625, "objective/train/weight_avg": 1.0008677244186401, "objective/train/weighted_lm_loss": 1.4404685497283936, "objective/train/weights_max": 1.2927756309509277, "objective/train/weights_min": 0.37808898091316223, "theoretical_loss": 3.5970105756084525, "tokens_seen": 1271398400 }, { "epoch": 0.39, "learning_rate": 0.0006209276199646927, "loss": 0.0703, "theoretical_loss": 3.5970105756084525, "tokens_seen": 1271398400 }, { "epoch": 0.39, "learning_rate": 0.0006208473760231102, "loss": 0.0727, "theoretical_loss": 3.5969436357442683, "tokens_seen": 1271660544 }, { "epoch": 0.39, "learning_rate": 0.0006207671320815279, "loss": 0.0719, "theoretical_loss": 3.5968767135407154, "tokens_seen": 1271922688 }, { "epoch": 0.39, "learning_rate": 0.0006206868881399454, "loss": 0.0712, "theoretical_loss": 3.596809808989496, "tokens_seen": 1272184832 }, { "epoch": 0.39, "learning_rate": 0.000620606644198363, "loss": 0.0727, "theoretical_loss": 3.596742922082319, "tokens_seen": 1272446976 }, { "epoch": 0.39, "learning_rate": 0.0006205264002567807, "loss": 0.0704, "theoretical_loss": 3.5966760528108965, "tokens_seen": 1272709120 }, { "epoch": 0.39, "learning_rate": 0.0006204461563151982, "loss": 0.0725, "theoretical_loss": 3.596609201166948, "tokens_seen": 1272971264 }, { "epoch": 0.39, "learning_rate": 0.0006203659123736158, "loss": 0.0726, "theoretical_loss": 3.596542367142198, "tokens_seen": 1273233408 }, { "epoch": 0.39, "learning_rate": 0.0006202856684320334, "loss": 0.0725, "theoretical_loss": 3.5964755507283774, "tokens_seen": 1273495552 }, { "epoch": 0.39, "learning_rate": 0.000620205424490451, "loss": 0.0737, "theoretical_loss": 3.5964087519172203, "tokens_seen": 1273757696 }, { "epoch": 0.39, "learning_rate": 0.0006201251805488686, "loss": 0.0744, "theoretical_loss": 3.59634197070047, "tokens_seen": 1274019840 }, { "epoch": 0.39, "learning_rate": 0.0006200449366072861, "loss": 0.075, "theoretical_loss": 3.5962752070698727, "tokens_seen": 1274281984 }, { "epoch": 0.39, "learning_rate": 0.0006199646926657037, "loss": 0.0693, "theoretical_loss": 3.59620846101718, "tokens_seen": 1274544128 }, { "epoch": 0.39, "learning_rate": 0.0006198844487241214, "loss": 0.0726, "theoretical_loss": 3.596141732534151, "tokens_seen": 1274806272 }, { "epoch": 0.39, "learning_rate": 0.000619804204782539, "loss": 0.0749, "theoretical_loss": 3.596075021612549, "tokens_seen": 1275068416 }, { "epoch": 0.39, "learning_rate": 0.0006197239608409565, "loss": 0.0739, "theoretical_loss": 3.5960083282441433, "tokens_seen": 1275330560 }, { "epoch": 0.39, "learning_rate": 0.0006196437168993742, "loss": 0.0741, "theoretical_loss": 3.5959416524207084, "tokens_seen": 1275592704 }, { "epoch": 0.39, "learning_rate": 0.0006195634729577917, "loss": 0.0732, "theoretical_loss": 3.5958749941340242, "tokens_seen": 1275854848 }, { "epoch": 0.39, "learning_rate": 0.0006194832290162092, "loss": 0.0735, "theoretical_loss": 3.595808353375877, "tokens_seen": 1276116992 }, { "epoch": 0.39, "learning_rate": 0.0006194029850746269, "loss": 0.0723, "theoretical_loss": 3.5957417301380588, "tokens_seen": 1276379136 }, { "epoch": 0.39, "learning_rate": 0.0006193227411330444, "loss": 0.0735, "theoretical_loss": 3.595675124412365, "tokens_seen": 1276641280 }, { "epoch": 0.39, "learning_rate": 0.000619242497191462, "loss": 0.0762, "theoretical_loss": 3.595608536190599, "tokens_seen": 1276903424 }, { "epoch": 0.39, "learning_rate": 0.0006191622532498797, "loss": 0.0724, "theoretical_loss": 3.595541965464568, "tokens_seen": 1277165568 }, { "epoch": 0.39, "learning_rate": 0.0006190820093082973, "loss": 0.0702, "theoretical_loss": 3.5954754122260866, "tokens_seen": 1277427712 }, { "epoch": 0.39, "learning_rate": 0.0006190017653667148, "loss": 0.0739, "theoretical_loss": 3.595408876466972, "tokens_seen": 1277689856 }, { "epoch": 0.39, "objective/train/advantage_avg": -0.0006468091160058975, "objective/train/docs_used": 467639, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4236520528793335, "objective/train/original_loss": 1.423651933670044, "objective/train/theoretical_loss": 3.59534235817905, "objective/train/tokens_used": 1298412000, "objective/train/value_avg": -0.0084686279296875, "objective/train/value_loss": 0.0006970210233703256, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.8544921875, "objective/train/value_reward_corr": 0.7078890831361996, "objective/train/value_std": 0.022552490234375, "objective/train/weight_avg": 0.9996680021286011, "objective/train/weighted_lm_loss": 1.4211854934692383, "objective/train/weights_max": 2.232908010482788, "objective/train/weights_min": 0.36951375007629395, "theoretical_loss": 3.59534235817905, "tokens_seen": 1277952000 }, { "epoch": 0.39, "learning_rate": 0.0006189215214251324, "loss": 0.0727, "theoretical_loss": 3.59534235817905, "tokens_seen": 1277952000 }, { "epoch": 0.39, "learning_rate": 0.00061884127748355, "loss": 0.0732, "theoretical_loss": 3.5952758573541503, "tokens_seen": 1278214144 }, { "epoch": 0.39, "learning_rate": 0.0006187610335419676, "loss": 0.0722, "theoretical_loss": 3.5952093739841073, "tokens_seen": 1278476288 }, { "epoch": 0.39, "learning_rate": 0.0006186807896003852, "loss": 0.0743, "theoretical_loss": 3.5951429080607635, "tokens_seen": 1278738432 }, { "epoch": 0.39, "learning_rate": 0.0006186005456588027, "loss": 0.0717, "theoretical_loss": 3.5950764595759646, "tokens_seen": 1279000576 }, { "epoch": 0.39, "learning_rate": 0.0006185203017172204, "loss": 0.0725, "theoretical_loss": 3.5950100285215623, "tokens_seen": 1279262720 }, { "epoch": 0.39, "learning_rate": 0.000618440057775638, "loss": 0.0704, "theoretical_loss": 3.594943614889414, "tokens_seen": 1279524864 }, { "epoch": 0.39, "learning_rate": 0.0006183598138340556, "loss": 0.0745, "theoretical_loss": 3.5948772186713827, "tokens_seen": 1279787008 }, { "epoch": 0.39, "learning_rate": 0.0006182795698924732, "loss": 0.0727, "theoretical_loss": 3.5948108398593357, "tokens_seen": 1280049152 }, { "epoch": 0.39, "learning_rate": 0.0006181993259508907, "loss": 0.0735, "theoretical_loss": 3.5947444784451488, "tokens_seen": 1280311296 }, { "epoch": 0.39, "learning_rate": 0.0006181190820093083, "loss": 0.0715, "theoretical_loss": 3.5946781344206995, "tokens_seen": 1280573440 }, { "epoch": 0.39, "learning_rate": 0.0006180388380677259, "loss": 0.0745, "theoretical_loss": 3.5946118077778735, "tokens_seen": 1280835584 }, { "epoch": 0.39, "learning_rate": 0.0006179585941261435, "loss": 0.0758, "theoretical_loss": 3.59454549850856, "tokens_seen": 1281097728 }, { "epoch": 0.39, "learning_rate": 0.000617878350184561, "loss": 0.0736, "theoretical_loss": 3.594479206604655, "tokens_seen": 1281359872 }, { "epoch": 0.39, "learning_rate": 0.0006177981062429787, "loss": 0.0746, "theoretical_loss": 3.59441293205806, "tokens_seen": 1281622016 }, { "epoch": 0.39, "learning_rate": 0.0006177178623013962, "loss": 0.0724, "theoretical_loss": 3.59434667486068, "tokens_seen": 1281884160 }, { "epoch": 0.39, "learning_rate": 0.0006176376183598139, "loss": 0.0726, "theoretical_loss": 3.5942804350044284, "tokens_seen": 1282146304 }, { "epoch": 0.39, "learning_rate": 0.0006175573744182315, "loss": 0.0723, "theoretical_loss": 3.594214212481222, "tokens_seen": 1282408448 }, { "epoch": 0.39, "learning_rate": 0.000617477130476649, "loss": 0.07, "theoretical_loss": 3.594148007282983, "tokens_seen": 1282670592 }, { "epoch": 0.39, "learning_rate": 0.0006173968865350667, "loss": 0.0673, "theoretical_loss": 3.59408181940164, "tokens_seen": 1282932736 }, { "epoch": 0.39, "learning_rate": 0.0006173166425934842, "loss": 0.0711, "theoretical_loss": 3.5940156488291266, "tokens_seen": 1283194880 }, { "epoch": 0.39, "learning_rate": 0.0006172363986519018, "loss": 0.0704, "theoretical_loss": 3.593949495557381, "tokens_seen": 1283457024 }, { "epoch": 0.39, "learning_rate": 0.0006171561547103194, "loss": 0.0721, "theoretical_loss": 3.5938833595783484, "tokens_seen": 1283719168 }, { "epoch": 0.39, "learning_rate": 0.0006170759107687369, "loss": 0.0723, "theoretical_loss": 3.593817240883978, "tokens_seen": 1283981312 }, { "epoch": 0.39, "learning_rate": 0.0006169956668271545, "loss": 0.072, "theoretical_loss": 3.5937511394662254, "tokens_seen": 1284243456 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0013448818353936076, "objective/train/docs_used": 470107, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3530259132385254, "objective/train/original_loss": 1.3530259132385254, "objective/train/theoretical_loss": 3.5936850553170503, "objective/train/tokens_used": 1304965600, "objective/train/value_avg": -0.0072174072265625, "objective/train/value_loss": 0.00014462636318057775, "objective/train/value_max": -8.887052536010742e-05, "objective/train/value_min": -0.44189453125, "objective/train/value_reward_corr": 0.6305170353978925, "objective/train/value_std": 0.011138916015625, "objective/train/weight_avg": 1.0014116764068604, "objective/train/weighted_lm_loss": 1.3553401231765747, "objective/train/weights_max": 1.2892757654190063, "objective/train/weights_min": 0.39345037937164307, "theoretical_loss": 3.5936850553170503, "tokens_seen": 1284505600 }, { "epoch": 0.39, "learning_rate": 0.0006169154228855722, "loss": 0.0713, "theoretical_loss": 3.5936850553170503, "tokens_seen": 1284505600 }, { "epoch": 0.39, "learning_rate": 0.0006168351789439898, "loss": 0.0734, "theoretical_loss": 3.593618988428419, "tokens_seen": 1284767744 }, { "epoch": 0.39, "learning_rate": 0.0006167549350024073, "loss": 0.07, "theoretical_loss": 3.593552938792303, "tokens_seen": 1285029888 }, { "epoch": 0.39, "learning_rate": 0.000616674691060825, "loss": 0.0716, "theoretical_loss": 3.5934869064006785, "tokens_seen": 1285292032 }, { "epoch": 0.39, "learning_rate": 0.0006165944471192425, "loss": 0.0734, "theoretical_loss": 3.593420891245527, "tokens_seen": 1285554176 }, { "epoch": 0.39, "learning_rate": 0.00061651420317766, "loss": 0.0748, "theoretical_loss": 3.5933548933188377, "tokens_seen": 1285816320 }, { "epoch": 0.39, "learning_rate": 0.0006164339592360777, "loss": 0.0713, "theoretical_loss": 3.5932889126126017, "tokens_seen": 1286078464 }, { "epoch": 0.39, "learning_rate": 0.0006163537152944952, "loss": 0.0738, "theoretical_loss": 3.5932229491188172, "tokens_seen": 1286340608 }, { "epoch": 0.39, "learning_rate": 0.0006162734713529129, "loss": 0.0708, "theoretical_loss": 3.5931570028294884, "tokens_seen": 1286602752 }, { "epoch": 0.39, "learning_rate": 0.0006161932274113304, "loss": 0.0719, "theoretical_loss": 3.5930910737366233, "tokens_seen": 1286864896 }, { "epoch": 0.39, "learning_rate": 0.0006161129834697481, "loss": 0.0731, "theoretical_loss": 3.5930251618322364, "tokens_seen": 1287127040 }, { "epoch": 0.39, "learning_rate": 0.0006160327395281657, "loss": 0.072, "theoretical_loss": 3.5929592671083466, "tokens_seen": 1287389184 }, { "epoch": 0.39, "learning_rate": 0.0006159524955865832, "loss": 0.0716, "theoretical_loss": 3.5928933895569792, "tokens_seen": 1287651328 }, { "epoch": 0.39, "learning_rate": 0.0006158722516450008, "loss": 0.07, "theoretical_loss": 3.5928275291701643, "tokens_seen": 1287913472 }, { "epoch": 0.39, "learning_rate": 0.0006157920077034184, "loss": 0.071, "theoretical_loss": 3.592761685939937, "tokens_seen": 1288175616 }, { "epoch": 0.39, "learning_rate": 0.000615711763761836, "loss": 0.0717, "theoretical_loss": 3.5926958598583383, "tokens_seen": 1288437760 }, { "epoch": 0.39, "learning_rate": 0.0006156315198202535, "loss": 0.0692, "theoretical_loss": 3.5926300509174136, "tokens_seen": 1288699904 }, { "epoch": 0.39, "learning_rate": 0.0006155512758786712, "loss": 0.0711, "theoretical_loss": 3.5925642591092153, "tokens_seen": 1288962048 }, { "epoch": 0.39, "learning_rate": 0.0006154710319370887, "loss": 0.0694, "theoretical_loss": 3.592498484425799, "tokens_seen": 1289224192 }, { "epoch": 0.39, "learning_rate": 0.0006153907879955064, "loss": 0.0737, "theoretical_loss": 3.5924327268592267, "tokens_seen": 1289486336 }, { "epoch": 0.39, "learning_rate": 0.000615310544053924, "loss": 0.0696, "theoretical_loss": 3.5923669864015664, "tokens_seen": 1289748480 }, { "epoch": 0.39, "learning_rate": 0.0006152303001123415, "loss": 0.0699, "theoretical_loss": 3.59230126304489, "tokens_seen": 1290010624 }, { "epoch": 0.39, "learning_rate": 0.0006151500561707592, "loss": 0.0732, "theoretical_loss": 3.592235556781276, "tokens_seen": 1290272768 }, { "epoch": 0.39, "learning_rate": 0.0006150698122291767, "loss": 0.0705, "theoretical_loss": 3.5921698676028058, "tokens_seen": 1290534912 }, { "epoch": 0.39, "learning_rate": 0.0006149895682875943, "loss": 0.0684, "theoretical_loss": 3.59210419550157, "tokens_seen": 1290797056 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0011514985235407948, "objective/train/docs_used": 472355, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4526315927505493, "objective/train/original_loss": 1.4526317119598389, "objective/train/theoretical_loss": 3.5920385404696606, "objective/train/tokens_used": 1311519200, "objective/train/value_avg": -0.007213592529296875, "objective/train/value_loss": 0.00013709472841583192, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.9716796875, "objective/train/value_reward_corr": 0.8126945195500097, "objective/train/value_std": 0.01486968994140625, "objective/train/weight_avg": 1.0012160539627075, "objective/train/weighted_lm_loss": 1.4547836780548096, "objective/train/weights_max": 1.2823024988174438, "objective/train/weights_min": 0.5326542258262634, "theoretical_loss": 3.5920385404696606, "tokens_seen": 1291059200 }, { "epoch": 0.39, "learning_rate": 0.0006149093243460119, "loss": 0.0722, "theoretical_loss": 3.5920385404696606, "tokens_seen": 1291059200 }, { "epoch": 0.39, "learning_rate": 0.0006148290804044295, "loss": 0.0715, "theoretical_loss": 3.591972902499177, "tokens_seen": 1291321344 }, { "epoch": 0.39, "learning_rate": 0.000614748836462847, "loss": 0.0714, "theoretical_loss": 3.5919072815822233, "tokens_seen": 1291583488 }, { "epoch": 0.39, "learning_rate": 0.0006146685925212647, "loss": 0.0713, "theoretical_loss": 3.5918416777109092, "tokens_seen": 1291845632 }, { "epoch": 0.39, "learning_rate": 0.0006145883485796823, "loss": 0.0698, "theoretical_loss": 3.591776090877349, "tokens_seen": 1292107776 }, { "epoch": 0.39, "learning_rate": 0.0006145081046380998, "loss": 0.0753, "theoretical_loss": 3.5917105210736624, "tokens_seen": 1292369920 }, { "epoch": 0.39, "learning_rate": 0.0006144278606965175, "loss": 0.0718, "theoretical_loss": 3.591644968291975, "tokens_seen": 1292632064 }, { "epoch": 0.39, "learning_rate": 0.000614347616754935, "loss": 0.0707, "theoretical_loss": 3.5915794325244175, "tokens_seen": 1292894208 }, { "epoch": 0.39, "learning_rate": 0.0006142673728133526, "loss": 0.0722, "theoretical_loss": 3.5915139137631242, "tokens_seen": 1293156352 }, { "epoch": 0.39, "learning_rate": 0.0006141871288717702, "loss": 0.07, "theoretical_loss": 3.5914484120002372, "tokens_seen": 1293418496 }, { "epoch": 0.39, "learning_rate": 0.0006141068849301877, "loss": 0.0718, "theoretical_loss": 3.591382927227902, "tokens_seen": 1293680640 }, { "epoch": 0.39, "learning_rate": 0.0006140266409886053, "loss": 0.0747, "theoretical_loss": 3.5913174594382693, "tokens_seen": 1293942784 }, { "epoch": 0.39, "learning_rate": 0.000613946397047023, "loss": 0.0725, "theoretical_loss": 3.5912520086234974, "tokens_seen": 1294204928 }, { "epoch": 0.39, "learning_rate": 0.0006138661531054406, "loss": 0.0733, "theoretical_loss": 3.5911865747757457, "tokens_seen": 1294467072 }, { "epoch": 0.39, "learning_rate": 0.0006137859091638582, "loss": 0.0715, "theoretical_loss": 3.5911211578871827, "tokens_seen": 1294729216 }, { "epoch": 0.39, "learning_rate": 0.0006137056652222758, "loss": 0.0701, "theoretical_loss": 3.591055757949981, "tokens_seen": 1294991360 }, { "epoch": 0.39, "learning_rate": 0.0006136254212806933, "loss": 0.0739, "theoretical_loss": 3.590990374956316, "tokens_seen": 1295253504 }, { "epoch": 0.39, "learning_rate": 0.0006135451773391109, "loss": 0.0739, "theoretical_loss": 3.5909250088983713, "tokens_seen": 1295515648 }, { "epoch": 0.39, "learning_rate": 0.0006134649333975285, "loss": 0.074, "theoretical_loss": 3.5908596597683347, "tokens_seen": 1295777792 }, { "epoch": 0.39, "learning_rate": 0.000613384689455946, "loss": 0.0713, "theoretical_loss": 3.590794327558399, "tokens_seen": 1296039936 }, { "epoch": 0.39, "learning_rate": 0.0006133044455143637, "loss": 0.0697, "theoretical_loss": 3.590729012260762, "tokens_seen": 1296302080 }, { "epoch": 0.39, "learning_rate": 0.0006132242015727812, "loss": 0.0739, "theoretical_loss": 3.5906637138676265, "tokens_seen": 1296564224 }, { "epoch": 0.39, "learning_rate": 0.0006131439576311989, "loss": 0.0708, "theoretical_loss": 3.590598432371202, "tokens_seen": 1296826368 }, { "epoch": 0.39, "learning_rate": 0.0006130637136896165, "loss": 0.0705, "theoretical_loss": 3.5905331677637013, "tokens_seen": 1297088512 }, { "epoch": 0.39, "learning_rate": 0.000612983469748034, "loss": 0.0712, "theoretical_loss": 3.5904679200373435, "tokens_seen": 1297350656 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0005244042840786278, "objective/train/docs_used": 474727, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4231724739074707, "objective/train/original_loss": 1.4231724739074707, "objective/train/theoretical_loss": 3.5904026891843523, "objective/train/tokens_used": 1318072800, "objective/train/value_avg": -0.006511688232421875, "objective/train/value_loss": 0.00018917610577773303, "objective/train/value_max": -5.1856040954589844e-05, "objective/train/value_min": -0.394287109375, "objective/train/value_reward_corr": 0.6175634721862767, "objective/train/value_std": 0.0108795166015625, "objective/train/weight_avg": 1.000605583190918, "objective/train/weighted_lm_loss": 1.4241219758987427, "objective/train/weights_max": 1.4251643419265747, "objective/train/weights_min": 0.23235876858234406, "theoretical_loss": 3.5904026891843523, "tokens_seen": 1297612800 }, { "epoch": 0.39, "learning_rate": 0.0006129032258064516, "loss": 0.0701, "theoretical_loss": 3.5904026891843523, "tokens_seen": 1297612800 }, { "epoch": 0.39, "learning_rate": 0.0006128229818648692, "loss": 0.0683, "theoretical_loss": 3.5903374751969563, "tokens_seen": 1297874944 }, { "epoch": 0.39, "learning_rate": 0.0006127427379232868, "loss": 0.0687, "theoretical_loss": 3.59027227806739, "tokens_seen": 1298137088 }, { "epoch": 0.39, "learning_rate": 0.0006126624939817043, "loss": 0.0709, "theoretical_loss": 3.5902070977878937, "tokens_seen": 1298399232 }, { "epoch": 0.39, "learning_rate": 0.000612582250040122, "loss": 0.07, "theoretical_loss": 3.5901419343507106, "tokens_seen": 1298661376 }, { "epoch": 0.39, "learning_rate": 0.0006125020060985395, "loss": 0.0704, "theoretical_loss": 3.5900767877480906, "tokens_seen": 1298923520 }, { "epoch": 0.39, "learning_rate": 0.0006124217621569572, "loss": 0.0695, "theoretical_loss": 3.5900116579722883, "tokens_seen": 1299185664 }, { "epoch": 0.39, "learning_rate": 0.0006123415182153748, "loss": 0.0715, "theoretical_loss": 3.5899465450155637, "tokens_seen": 1299447808 }, { "epoch": 0.39, "learning_rate": 0.0006122612742737923, "loss": 0.0724, "theoretical_loss": 3.589881448870182, "tokens_seen": 1299709952 }, { "epoch": 0.39, "learning_rate": 0.00061218103033221, "loss": 0.0722, "theoretical_loss": 3.589816369528413, "tokens_seen": 1299972096 }, { "epoch": 0.39, "learning_rate": 0.0006121007863906275, "loss": 0.0702, "theoretical_loss": 3.5897513069825324, "tokens_seen": 1300234240 }, { "epoch": 0.39, "learning_rate": 0.0006120205424490451, "loss": 0.0719, "theoretical_loss": 3.589686261224819, "tokens_seen": 1300496384 }, { "epoch": 0.39, "learning_rate": 0.0006119402985074627, "loss": 0.0717, "theoretical_loss": 3.5896212322475605, "tokens_seen": 1300758528 }, { "epoch": 0.39, "learning_rate": 0.0006118600545658803, "loss": 0.0712, "theoretical_loss": 3.589556220043046, "tokens_seen": 1301020672 }, { "epoch": 0.39, "learning_rate": 0.0006117798106242978, "loss": 0.0715, "theoretical_loss": 3.589491224603571, "tokens_seen": 1301282816 }, { "epoch": 0.39, "learning_rate": 0.0006116995666827155, "loss": 0.0714, "theoretical_loss": 3.5894262459214366, "tokens_seen": 1301544960 }, { "epoch": 0.39, "learning_rate": 0.0006116193227411331, "loss": 0.0695, "theoretical_loss": 3.589361283988948, "tokens_seen": 1301807104 }, { "epoch": 0.39, "learning_rate": 0.0006115390787995506, "loss": 0.0711, "theoretical_loss": 3.589296338798418, "tokens_seen": 1302069248 }, { "epoch": 0.39, "learning_rate": 0.0006114588348579683, "loss": 0.0724, "theoretical_loss": 3.5892314103421596, "tokens_seen": 1302331392 }, { "epoch": 0.39, "learning_rate": 0.0006113785909163858, "loss": 0.0712, "theoretical_loss": 3.589166498612496, "tokens_seen": 1302593536 }, { "epoch": 0.39, "learning_rate": 0.0006112983469748035, "loss": 0.0718, "theoretical_loss": 3.589101603601752, "tokens_seen": 1302855680 }, { "epoch": 0.39, "learning_rate": 0.000611218103033221, "loss": 0.0731, "theoretical_loss": 3.58903672530226, "tokens_seen": 1303117824 }, { "epoch": 0.39, "learning_rate": 0.0006111378590916385, "loss": 0.0719, "theoretical_loss": 3.5889718637063552, "tokens_seen": 1303379968 }, { "epoch": 0.4, "learning_rate": 0.0006110576151500562, "loss": 0.0701, "theoretical_loss": 3.5889070188063794, "tokens_seen": 1303642112 }, { "epoch": 0.4, "learning_rate": 0.0006109773712084738, "loss": 0.0733, "theoretical_loss": 3.5888421905946783, "tokens_seen": 1303904256 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0004786965437233448, "objective/train/docs_used": 476908, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.320482850074768, "objective/train/original_loss": 1.3204827308654785, "objective/train/theoretical_loss": 3.588777379063604, "objective/train/tokens_used": 1324626400, "objective/train/value_avg": -0.007488250732421875, "objective/train/value_loss": 0.0003243642277084291, "objective/train/value_max": -8.821487426757812e-05, "objective/train/value_min": -0.79052734375, "objective/train/value_reward_corr": 0.5708697386938653, "objective/train/value_std": 0.01364898681640625, "objective/train/weight_avg": 1.0006197690963745, "objective/train/weighted_lm_loss": 1.3216181993484497, "objective/train/weights_max": 1.9110832214355469, "objective/train/weights_min": 0.368977814912796, "theoretical_loss": 3.588777379063604, "tokens_seen": 1304166400 }, { "epoch": 0.4, "learning_rate": 0.0006108971272668914, "loss": 0.0702, "theoretical_loss": 3.588777379063604, "tokens_seen": 1304166400 }, { "epoch": 0.4, "learning_rate": 0.000610816883325309, "loss": 0.074, "theoretical_loss": 3.5887125842055116, "tokens_seen": 1304428544 }, { "epoch": 0.4, "learning_rate": 0.0006107366393837266, "loss": 0.07, "theoretical_loss": 3.588647806012765, "tokens_seen": 1304690688 }, { "epoch": 0.4, "learning_rate": 0.0006106563954421441, "loss": 0.0711, "theoretical_loss": 3.588583044477728, "tokens_seen": 1304952832 }, { "epoch": 0.4, "learning_rate": 0.0006105761515005617, "loss": 0.0741, "theoretical_loss": 3.5885182995927734, "tokens_seen": 1305214976 }, { "epoch": 0.4, "learning_rate": 0.0006104959075589793, "loss": 0.071, "theoretical_loss": 3.5884535713502776, "tokens_seen": 1305477120 }, { "epoch": 0.4, "learning_rate": 0.0006104156636173968, "loss": 0.07, "theoretical_loss": 3.588388859742622, "tokens_seen": 1305739264 }, { "epoch": 0.4, "learning_rate": 0.0006103354196758145, "loss": 0.0677, "theoretical_loss": 3.5883241647621933, "tokens_seen": 1306001408 }, { "epoch": 0.4, "learning_rate": 0.000610255175734232, "loss": 0.0699, "theoretical_loss": 3.588259486401383, "tokens_seen": 1306263552 }, { "epoch": 0.4, "learning_rate": 0.0006101749317926497, "loss": 0.0711, "theoretical_loss": 3.5881948246525877, "tokens_seen": 1306525696 }, { "epoch": 0.4, "learning_rate": 0.0006100946878510673, "loss": 0.071, "theoretical_loss": 3.588130179508209, "tokens_seen": 1306787840 }, { "epoch": 0.4, "learning_rate": 0.0006100144439094848, "loss": 0.0716, "theoretical_loss": 3.5880655509606534, "tokens_seen": 1307049984 }, { "epoch": 0.4, "learning_rate": 0.0006099341999679025, "loss": 0.0734, "theoretical_loss": 3.5880009390023324, "tokens_seen": 1307312128 }, { "epoch": 0.4, "learning_rate": 0.00060985395602632, "loss": 0.0694, "theoretical_loss": 3.5879363436256626, "tokens_seen": 1307574272 }, { "epoch": 0.4, "learning_rate": 0.0006097737120847376, "loss": 0.0722, "theoretical_loss": 3.587871764823066, "tokens_seen": 1307836416 }, { "epoch": 0.4, "learning_rate": 0.0006096934681431552, "loss": 0.073, "theoretical_loss": 3.5878072025869683, "tokens_seen": 1308098560 }, { "epoch": 0.4, "learning_rate": 0.0006096132242015728, "loss": 0.0697, "theoretical_loss": 3.5877426569098017, "tokens_seen": 1308360704 }, { "epoch": 0.4, "learning_rate": 0.0006095329802599903, "loss": 0.069, "theoretical_loss": 3.5876781277840024, "tokens_seen": 1308622848 }, { "epoch": 0.4, "learning_rate": 0.000609452736318408, "loss": 0.0712, "theoretical_loss": 3.5876136152020117, "tokens_seen": 1308884992 }, { "epoch": 0.4, "learning_rate": 0.0006093724923768256, "loss": 0.0715, "theoretical_loss": 3.587549119156276, "tokens_seen": 1309147136 }, { "epoch": 0.4, "learning_rate": 0.0006092922484352431, "loss": 0.0747, "theoretical_loss": 3.5874846396392472, "tokens_seen": 1309409280 }, { "epoch": 0.4, "learning_rate": 0.0006092120044936608, "loss": 0.0718, "theoretical_loss": 3.5874201766433815, "tokens_seen": 1309671424 }, { "epoch": 0.4, "learning_rate": 0.0006091317605520783, "loss": 0.0729, "theoretical_loss": 3.58735573016114, "tokens_seen": 1309933568 }, { "epoch": 0.4, "learning_rate": 0.0006090515166104959, "loss": 0.0722, "theoretical_loss": 3.5872913001849884, "tokens_seen": 1310195712 }, { "epoch": 0.4, "learning_rate": 0.0006089712726689135, "loss": 0.0694, "theoretical_loss": 3.5872268867073993, "tokens_seen": 1310457856 }, { "debugging/Compilability": 0.9473684210526315, "debugging/distinct-1-grams": 0.7438439469491407, "debugging/entropy-1-grams": 5.528104507223894, "debugging/length": 493.7894736842105, "debugging/num_segments": 19, "debugging/raw_token_scores_avg": 0.008907916024327278, "debugging/raw_token_scores_std": 0.03160863742232323, "debugging/score": 0.005095041850402146, "debugging/score_std": 0.005375833718357644, "epoch": 0.4, "objective/train/advantage_avg": 0.0013337053824216127, "objective/train/docs_used": 479375, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4477362632751465, "objective/train/original_loss": 1.4477365016937256, "objective/train/theoretical_loss": 3.587162489720847, "objective/train/tokens_used": 1331180000, "objective/train/value_avg": -0.0102386474609375, "objective/train/value_loss": 0.00042610923992469907, "objective/train/value_max": -0.00011235475540161133, "objective/train/value_min": -0.8984375, "objective/train/value_reward_corr": 0.7585738573996782, "objective/train/value_std": 0.0240631103515625, "objective/train/weight_avg": 1.0015311241149902, "objective/train/weighted_lm_loss": 1.4506300687789917, "objective/train/weights_max": 1.8746426105499268, "objective/train/weights_min": 0.3877526521682739, "theoretical_loss": 3.587162489720847, "tokens_seen": 1310720000 }, { "epoch": 0.4, "learning_rate": 0.0006088910287273311, "loss": 0.0695, "theoretical_loss": 3.587162489720847, "tokens_seen": 1310720000 }, { "epoch": 0.4, "learning_rate": 0.0006088107847857486, "loss": 0.0699, "theoretical_loss": 3.5870981092178136, "tokens_seen": 1310982144 }, { "epoch": 0.4, "learning_rate": 0.0006087305408441663, "loss": 0.0719, "theoretical_loss": 3.5870337451907854, "tokens_seen": 1311244288 }, { "epoch": 0.4, "learning_rate": 0.0006086502969025839, "loss": 0.0723, "theoretical_loss": 3.5869693976322523, "tokens_seen": 1311506432 }, { "epoch": 0.4, "learning_rate": 0.0006085700529610015, "loss": 0.0676, "theoretical_loss": 3.586905066534711, "tokens_seen": 1311768576 }, { "epoch": 0.4, "learning_rate": 0.0006084898090194191, "loss": 0.0664, "theoretical_loss": 3.5868407518906618, "tokens_seen": 1312030720 }, { "epoch": 0.4, "learning_rate": 0.0006084095650778366, "loss": 0.0729, "theoretical_loss": 3.586776453692611, "tokens_seen": 1312292864 }, { "epoch": 0.4, "learning_rate": 0.0006083293211362543, "loss": 0.0704, "theoretical_loss": 3.5867121719330677, "tokens_seen": 1312555008 }, { "epoch": 0.4, "learning_rate": 0.0006082490771946718, "loss": 0.0711, "theoretical_loss": 3.586647906604549, "tokens_seen": 1312817152 }, { "epoch": 0.4, "learning_rate": 0.0006081688332530893, "loss": 0.0709, "theoretical_loss": 3.5865836576995744, "tokens_seen": 1313079296 }, { "epoch": 0.4, "learning_rate": 0.000608088589311507, "loss": 0.0707, "theoretical_loss": 3.5865194252106694, "tokens_seen": 1313341440 }, { "epoch": 0.4, "learning_rate": 0.0006080083453699246, "loss": 0.0727, "theoretical_loss": 3.586455209130364, "tokens_seen": 1313603584 }, { "epoch": 0.4, "learning_rate": 0.0006079281014283422, "loss": 0.0739, "theoretical_loss": 3.5863910094511935, "tokens_seen": 1313865728 }, { "epoch": 0.4, "learning_rate": 0.0006078478574867598, "loss": 0.0712, "theoretical_loss": 3.586326826165698, "tokens_seen": 1314127872 }, { "epoch": 0.4, "learning_rate": 0.0006077676135451774, "loss": 0.0713, "theoretical_loss": 3.5862626592664215, "tokens_seen": 1314390016 }, { "epoch": 0.4, "learning_rate": 0.0006076873696035949, "loss": 0.0703, "theoretical_loss": 3.586198508745915, "tokens_seen": 1314652160 }, { "epoch": 0.4, "learning_rate": 0.0006076071256620125, "loss": 0.0705, "theoretical_loss": 3.586134374596732, "tokens_seen": 1314914304 }, { "epoch": 0.4, "learning_rate": 0.0006075268817204301, "loss": 0.0721, "theoretical_loss": 3.586070256811432, "tokens_seen": 1315176448 }, { "epoch": 0.4, "learning_rate": 0.0006074466377788477, "loss": 0.069, "theoretical_loss": 3.58600615538258, "tokens_seen": 1315438592 }, { "epoch": 0.4, "learning_rate": 0.0006073663938372653, "loss": 0.0738, "theoretical_loss": 3.5859420703027447, "tokens_seen": 1315700736 }, { "epoch": 0.4, "learning_rate": 0.0006072861498956828, "loss": 0.0701, "theoretical_loss": 3.5858780015644998, "tokens_seen": 1315962880 }, { "epoch": 0.4, "learning_rate": 0.0006072059059541006, "loss": 0.0751, "theoretical_loss": 3.585813949160425, "tokens_seen": 1316225024 }, { "epoch": 0.4, "learning_rate": 0.0006071256620125181, "loss": 0.0699, "theoretical_loss": 3.585749913083103, "tokens_seen": 1316487168 }, { "epoch": 0.4, "learning_rate": 0.0006070454180709356, "loss": 0.0754, "theoretical_loss": 3.5856858933251234, "tokens_seen": 1316749312 }, { "epoch": 0.4, "learning_rate": 0.0006069651741293533, "loss": 0.0727, "theoretical_loss": 3.5856218898790786, "tokens_seen": 1317011456 }, { "epoch": 0.4, "objective/train/advantage_avg": -0.0005380894290283322, "objective/train/docs_used": 481887, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4740736484527588, "objective/train/original_loss": 1.4740736484527588, "objective/train/theoretical_loss": 3.585557902737568, "objective/train/tokens_used": 1337733600, "objective/train/value_avg": -0.009063720703125, "objective/train/value_loss": 0.00029063946567475796, "objective/train/value_max": -6.014108657836914e-05, "objective/train/value_min": -0.982421875, "objective/train/value_reward_corr": 0.906233165609036, "objective/train/value_std": 0.030059814453125, "objective/train/weight_avg": 0.9995934963226318, "objective/train/weighted_lm_loss": 1.4738991260528564, "objective/train/weights_max": 1.3234916925430298, "objective/train/weights_min": 0.37601205706596375, "theoretical_loss": 3.585557902737568, "tokens_seen": 1317273600 }, { "epoch": 0.4, "learning_rate": 0.0006068849301877708, "loss": 0.0742, "theoretical_loss": 3.585557902737568, "tokens_seen": 1317273600 }, { "epoch": 0.4, "learning_rate": 0.0006068046862461884, "loss": 0.077, "theoretical_loss": 3.5854939318931933, "tokens_seen": 1317535744 }, { "epoch": 0.4, "learning_rate": 0.000606724442304606, "loss": 0.0721, "theoretical_loss": 3.5854299773385634, "tokens_seen": 1317797888 }, { "epoch": 0.4, "learning_rate": 0.0006066441983630236, "loss": 0.0726, "theoretical_loss": 3.585366039066291, "tokens_seen": 1318060032 }, { "epoch": 0.4, "learning_rate": 0.0006065639544214411, "loss": 0.0716, "theoretical_loss": 3.585302117068993, "tokens_seen": 1318322176 }, { "epoch": 0.4, "learning_rate": 0.0006064837104798588, "loss": 0.072, "theoretical_loss": 3.585238211339292, "tokens_seen": 1318584320 }, { "epoch": 0.4, "learning_rate": 0.0006064034665382764, "loss": 0.0703, "theoretical_loss": 3.5851743218698156, "tokens_seen": 1318846464 }, { "epoch": 0.4, "learning_rate": 0.0006063232225966939, "loss": 0.0713, "theoretical_loss": 3.585110448653195, "tokens_seen": 1319108608 }, { "epoch": 0.4, "learning_rate": 0.0006062429786551116, "loss": 0.0699, "theoretical_loss": 3.585046591682068, "tokens_seen": 1319370752 }, { "epoch": 0.4, "learning_rate": 0.0006061627347135291, "loss": 0.0718, "theoretical_loss": 3.5849827509490746, "tokens_seen": 1319632896 }, { "epoch": 0.4, "learning_rate": 0.0006060824907719468, "loss": 0.0718, "theoretical_loss": 3.584918926446863, "tokens_seen": 1319895040 }, { "epoch": 0.4, "learning_rate": 0.0006060022468303643, "loss": 0.0706, "theoretical_loss": 3.5848551181680826, "tokens_seen": 1320157184 }, { "epoch": 0.4, "learning_rate": 0.0006059220028887819, "loss": 0.0678, "theoretical_loss": 3.5847913261053908, "tokens_seen": 1320419328 }, { "epoch": 0.4, "learning_rate": 0.0006058417589471995, "loss": 0.0719, "theoretical_loss": 3.584727550251447, "tokens_seen": 1320681472 }, { "epoch": 0.4, "learning_rate": 0.000605761515005617, "loss": 0.0733, "theoretical_loss": 3.5846637905989183, "tokens_seen": 1320943616 }, { "epoch": 0.4, "learning_rate": 0.0006056812710640347, "loss": 0.0709, "theoretical_loss": 3.5846000471404738, "tokens_seen": 1321205760 }, { "epoch": 0.4, "learning_rate": 0.0006056010271224523, "loss": 0.0763, "theoretical_loss": 3.5845363198687883, "tokens_seen": 1321467904 }, { "epoch": 0.4, "learning_rate": 0.0006055207831808699, "loss": 0.0726, "theoretical_loss": 3.584472608776542, "tokens_seen": 1321730048 }, { "epoch": 0.4, "learning_rate": 0.0006054405392392874, "loss": 0.0732, "theoretical_loss": 3.5844089138564197, "tokens_seen": 1321992192 }, { "epoch": 0.4, "learning_rate": 0.0006053602952977051, "loss": 0.0693, "theoretical_loss": 3.584345235101111, "tokens_seen": 1322254336 }, { "epoch": 0.4, "learning_rate": 0.0006052800513561226, "loss": 0.0709, "theoretical_loss": 3.584281572503309, "tokens_seen": 1322516480 }, { "epoch": 0.4, "learning_rate": 0.0006051998074145401, "loss": 0.0728, "theoretical_loss": 3.584217926055713, "tokens_seen": 1322778624 }, { "epoch": 0.4, "learning_rate": 0.0006051195634729578, "loss": 0.0734, "theoretical_loss": 3.584154295751027, "tokens_seen": 1323040768 }, { "epoch": 0.4, "learning_rate": 0.0006050393195313753, "loss": 0.0718, "theoretical_loss": 3.5840906815819586, "tokens_seen": 1323302912 }, { "epoch": 0.4, "learning_rate": 0.0006049590755897931, "loss": 0.0726, "theoretical_loss": 3.584027083541222, "tokens_seen": 1323565056 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.00013670318003278226, "objective/train/docs_used": 484332, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4303979873657227, "objective/train/original_loss": 1.4303977489471436, "objective/train/theoretical_loss": 3.583963501621533, "objective/train/tokens_used": 1344287200, "objective/train/value_avg": -0.00768280029296875, "objective/train/value_loss": 0.0003027576894965023, "objective/train/value_max": -9.244680404663086e-05, "objective/train/value_min": -0.53466796875, "objective/train/value_reward_corr": 0.6955805663532895, "objective/train/value_std": 0.0156707763671875, "objective/train/weight_avg": 1.0002702474594116, "objective/train/weighted_lm_loss": 1.4310005903244019, "objective/train/weights_max": 1.3974761962890625, "objective/train/weights_min": 0.3838087022304535, "theoretical_loss": 3.583963501621533, "tokens_seen": 1323827200 }, { "epoch": 0.4, "learning_rate": 0.0006048788316482106, "loss": 0.0721, "theoretical_loss": 3.583963501621533, "tokens_seen": 1323827200 }, { "epoch": 0.4, "learning_rate": 0.0006047985877066282, "loss": 0.0715, "theoretical_loss": 3.583899935815616, "tokens_seen": 1324089344 }, { "epoch": 0.4, "learning_rate": 0.0006047183437650458, "loss": 0.0715, "theoretical_loss": 3.583836386116197, "tokens_seen": 1324351488 }, { "epoch": 0.4, "learning_rate": 0.0006046380998234633, "loss": 0.072, "theoretical_loss": 3.5837728525160086, "tokens_seen": 1324613632 }, { "epoch": 0.4, "learning_rate": 0.0006045578558818809, "loss": 0.0765, "theoretical_loss": 3.5837093350077875, "tokens_seen": 1324875776 }, { "epoch": 0.4, "learning_rate": 0.0006044776119402985, "loss": 0.0711, "theoretical_loss": 3.5836458335842747, "tokens_seen": 1325137920 }, { "epoch": 0.4, "learning_rate": 0.0006043973679987161, "loss": 0.0728, "theoretical_loss": 3.5835823482382163, "tokens_seen": 1325400064 }, { "epoch": 0.4, "learning_rate": 0.0006043171240571336, "loss": 0.0712, "theoretical_loss": 3.583518878962364, "tokens_seen": 1325662208 }, { "epoch": 0.4, "learning_rate": 0.0006042368801155514, "loss": 0.0708, "theoretical_loss": 3.583455425749472, "tokens_seen": 1325924352 }, { "epoch": 0.4, "learning_rate": 0.0006041566361739689, "loss": 0.0725, "theoretical_loss": 3.583391988592301, "tokens_seen": 1326186496 }, { "epoch": 0.4, "learning_rate": 0.0006040763922323864, "loss": 0.0747, "theoretical_loss": 3.5833285674836164, "tokens_seen": 1326448640 }, { "epoch": 0.4, "learning_rate": 0.0006039961482908041, "loss": 0.0724, "theoretical_loss": 3.583265162416187, "tokens_seen": 1326710784 }, { "epoch": 0.4, "learning_rate": 0.0006039159043492216, "loss": 0.0722, "theoretical_loss": 3.583201773382788, "tokens_seen": 1326972928 }, { "epoch": 0.4, "learning_rate": 0.0006038356604076392, "loss": 0.0731, "theoretical_loss": 3.583138400376197, "tokens_seen": 1327235072 }, { "epoch": 0.4, "learning_rate": 0.0006037554164660568, "loss": 0.071, "theoretical_loss": 3.583075043389199, "tokens_seen": 1327497216 }, { "epoch": 0.4, "learning_rate": 0.0006036751725244744, "loss": 0.0729, "theoretical_loss": 3.583011702414581, "tokens_seen": 1327759360 }, { "epoch": 0.4, "learning_rate": 0.000603594928582892, "loss": 0.0736, "theoretical_loss": 3.5829483774451374, "tokens_seen": 1328021504 }, { "epoch": 0.4, "learning_rate": 0.0006035146846413096, "loss": 0.0711, "theoretical_loss": 3.582885068473665, "tokens_seen": 1328283648 }, { "epoch": 0.4, "learning_rate": 0.0006034344406997272, "loss": 0.0724, "theoretical_loss": 3.582821775492966, "tokens_seen": 1328545792 }, { "epoch": 0.4, "learning_rate": 0.0006033541967581448, "loss": 0.0712, "theoretical_loss": 3.5827584984958474, "tokens_seen": 1328807936 }, { "epoch": 0.4, "learning_rate": 0.0006032739528165624, "loss": 0.0733, "theoretical_loss": 3.582695237475121, "tokens_seen": 1329070080 }, { "epoch": 0.4, "learning_rate": 0.0006031937088749799, "loss": 0.0711, "theoretical_loss": 3.582631992423603, "tokens_seen": 1329332224 }, { "epoch": 0.4, "learning_rate": 0.0006031134649333976, "loss": 0.0709, "theoretical_loss": 3.582568763334115, "tokens_seen": 1329594368 }, { "epoch": 0.4, "learning_rate": 0.0006030332209918151, "loss": 0.0682, "theoretical_loss": 3.582505550199481, "tokens_seen": 1329856512 }, { "epoch": 0.4, "learning_rate": 0.0006029529770502326, "loss": 0.0706, "theoretical_loss": 3.5824423530125324, "tokens_seen": 1330118656 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0006204114179126918, "objective/train/docs_used": 486514, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3864256143569946, "objective/train/original_loss": 1.386425495147705, "objective/train/theoretical_loss": 3.582379171766104, "objective/train/tokens_used": 1350840800, "objective/train/value_avg": -0.00786590576171875, "objective/train/value_loss": 0.00019436066213529557, "objective/train/value_max": -8.219480514526367e-05, "objective/train/value_min": -0.68359375, "objective/train/value_reward_corr": 0.6674128947141043, "objective/train/value_std": 0.0129241943359375, "objective/train/weight_avg": 1.0007107257843018, "objective/train/weighted_lm_loss": 1.3870501518249512, "objective/train/weights_max": 1.2174729108810425, "objective/train/weights_min": 0.3684675693511963, "theoretical_loss": 3.582379171766104, "tokens_seen": 1330380800 }, { "epoch": 0.4, "learning_rate": 0.0006028727331086503, "loss": 0.0716, "theoretical_loss": 3.582379171766104, "tokens_seen": 1330380800 }, { "epoch": 0.4, "learning_rate": 0.0006027924891670679, "loss": 0.0721, "theoretical_loss": 3.582316006453034, "tokens_seen": 1330642944 }, { "epoch": 0.4, "learning_rate": 0.0006027122452254855, "loss": 0.072, "theoretical_loss": 3.5822528570661683, "tokens_seen": 1330905088 }, { "epoch": 0.4, "learning_rate": 0.0006026320012839031, "loss": 0.073, "theoretical_loss": 3.582189723598354, "tokens_seen": 1331167232 }, { "epoch": 0.4, "learning_rate": 0.0006025517573423207, "loss": 0.0721, "theoretical_loss": 3.582126606042446, "tokens_seen": 1331429376 }, { "epoch": 0.4, "learning_rate": 0.0006024715134007382, "loss": 0.0719, "theoretical_loss": 3.5820635043913005, "tokens_seen": 1331691520 }, { "epoch": 0.4, "learning_rate": 0.0006023912694591559, "loss": 0.073, "theoretical_loss": 3.582000418637781, "tokens_seen": 1331953664 }, { "epoch": 0.4, "learning_rate": 0.0006023110255175734, "loss": 0.0686, "theoretical_loss": 3.581937348774755, "tokens_seen": 1332215808 }, { "epoch": 0.4, "learning_rate": 0.000602230781575991, "loss": 0.0723, "theoretical_loss": 3.5818742947950932, "tokens_seen": 1332477952 }, { "epoch": 0.4, "learning_rate": 0.0006021505376344086, "loss": 0.0726, "theoretical_loss": 3.5818112566916724, "tokens_seen": 1332740096 }, { "epoch": 0.4, "learning_rate": 0.0006020702936928261, "loss": 0.0708, "theoretical_loss": 3.5817482344573746, "tokens_seen": 1333002240 }, { "epoch": 0.4, "learning_rate": 0.0006019900497512439, "loss": 0.0727, "theoretical_loss": 3.5816852280850835, "tokens_seen": 1333264384 }, { "epoch": 0.4, "learning_rate": 0.0006019098058096614, "loss": 0.0704, "theoretical_loss": 3.5816222375676903, "tokens_seen": 1333526528 }, { "epoch": 0.4, "learning_rate": 0.000601829561868079, "loss": 0.0721, "theoretical_loss": 3.58155926289809, "tokens_seen": 1333788672 }, { "epoch": 0.4, "learning_rate": 0.0006017493179264966, "loss": 0.0687, "theoretical_loss": 3.581496304069181, "tokens_seen": 1334050816 }, { "epoch": 0.4, "learning_rate": 0.0006016690739849141, "loss": 0.0735, "theoretical_loss": 3.5814333610738673, "tokens_seen": 1334312960 }, { "epoch": 0.4, "learning_rate": 0.0006015888300433317, "loss": 0.0718, "theoretical_loss": 3.5813704339050583, "tokens_seen": 1334575104 }, { "epoch": 0.4, "learning_rate": 0.0006015085861017493, "loss": 0.0717, "theoretical_loss": 3.581307522555666, "tokens_seen": 1334837248 }, { "epoch": 0.4, "learning_rate": 0.0006014283421601669, "loss": 0.0693, "theoretical_loss": 3.5812446270186085, "tokens_seen": 1335099392 }, { "epoch": 0.4, "learning_rate": 0.0006013480982185844, "loss": 0.0709, "theoretical_loss": 3.5811817472868075, "tokens_seen": 1335361536 }, { "epoch": 0.4, "learning_rate": 0.0006012678542770022, "loss": 0.0703, "theoretical_loss": 3.5811188833531897, "tokens_seen": 1335623680 }, { "epoch": 0.4, "learning_rate": 0.0006011876103354197, "loss": 0.0681, "theoretical_loss": 3.5810560352106866, "tokens_seen": 1335885824 }, { "epoch": 0.4, "learning_rate": 0.0006011073663938372, "loss": 0.0685, "theoretical_loss": 3.580993202852234, "tokens_seen": 1336147968 }, { "epoch": 0.4, "learning_rate": 0.0006010271224522549, "loss": 0.0675, "theoretical_loss": 3.580930386270772, "tokens_seen": 1336410112 }, { "epoch": 0.41, "learning_rate": 0.0006009468785106724, "loss": 0.0716, "theoretical_loss": 3.5808675854592464, "tokens_seen": 1336672256 }, { "epoch": 0.41, "objective/train/advantage_avg": -0.00022589701984543353, "objective/train/docs_used": 488954, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3641271591186523, "objective/train/original_loss": 1.3641269207000732, "objective/train/theoretical_loss": 3.5808048004106054, "objective/train/tokens_used": 1357394400, "objective/train/value_avg": -0.006866455078125, "objective/train/value_loss": 0.00021092750830575824, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.333984375, "objective/train/value_reward_corr": 0.6062614211122952, "objective/train/value_std": 0.0108489990234375, "objective/train/weight_avg": 0.9998708367347717, "objective/train/weighted_lm_loss": 1.3648322820663452, "objective/train/weights_max": 1.2626718282699585, "objective/train/weights_min": 0.3777256906032562, "theoretical_loss": 3.5808048004106054, "tokens_seen": 1336934400 }, { "epoch": 0.41, "learning_rate": 0.0006008666345690901, "loss": 0.0717, "theoretical_loss": 3.5808048004106054, "tokens_seen": 1336934400 }, { "epoch": 0.41, "learning_rate": 0.0006007863906275076, "loss": 0.071, "theoretical_loss": 3.5807420311178033, "tokens_seen": 1337196544 }, { "epoch": 0.41, "learning_rate": 0.0006007061466859252, "loss": 0.071, "theoretical_loss": 3.580679277573799, "tokens_seen": 1337458688 }, { "epoch": 0.41, "learning_rate": 0.0006006259027443429, "loss": 0.0713, "theoretical_loss": 3.5806165397715546, "tokens_seen": 1337720832 }, { "epoch": 0.41, "learning_rate": 0.0006005456588027604, "loss": 0.0719, "theoretical_loss": 3.580553817704039, "tokens_seen": 1337982976 }, { "epoch": 0.41, "learning_rate": 0.000600465414861178, "loss": 0.0697, "theoretical_loss": 3.580491111364223, "tokens_seen": 1338245120 }, { "epoch": 0.41, "learning_rate": 0.0006003851709195956, "loss": 0.0693, "theoretical_loss": 3.5804284207450836, "tokens_seen": 1338507264 }, { "epoch": 0.41, "learning_rate": 0.0006003049269780132, "loss": 0.0682, "theoretical_loss": 3.580365745839602, "tokens_seen": 1338769408 }, { "epoch": 0.41, "learning_rate": 0.0006002246830364307, "loss": 0.0719, "theoretical_loss": 3.5803030866407637, "tokens_seen": 1339031552 }, { "epoch": 0.41, "learning_rate": 0.0006001444390948484, "loss": 0.0701, "theoretical_loss": 3.580240443141559, "tokens_seen": 1339293696 }, { "epoch": 0.41, "learning_rate": 0.0006000641951532659, "loss": 0.0716, "theoretical_loss": 3.5801778153349817, "tokens_seen": 1339555840 }, { "epoch": 0.41, "learning_rate": 0.0005999839512116834, "loss": 0.0722, "theoretical_loss": 3.5801152032140315, "tokens_seen": 1339817984 }, { "epoch": 0.41, "learning_rate": 0.0005999037072701011, "loss": 0.0698, "theoretical_loss": 3.580052606771712, "tokens_seen": 1340080128 }, { "epoch": 0.41, "learning_rate": 0.0005998234633285187, "loss": 0.0697, "theoretical_loss": 3.579990026001031, "tokens_seen": 1340342272 }, { "epoch": 0.41, "learning_rate": 0.0005997432193869364, "loss": 0.072, "theoretical_loss": 3.579927460895002, "tokens_seen": 1340604416 }, { "epoch": 0.41, "learning_rate": 0.0005996629754453539, "loss": 0.0739, "theoretical_loss": 3.5798649114466405, "tokens_seen": 1340866560 }, { "epoch": 0.41, "learning_rate": 0.0005995827315037715, "loss": 0.0703, "theoretical_loss": 3.579802377648969, "tokens_seen": 1341128704 }, { "epoch": 0.41, "learning_rate": 0.0005995024875621891, "loss": 0.0724, "theoretical_loss": 3.579739859495013, "tokens_seen": 1341390848 }, { "epoch": 0.41, "learning_rate": 0.0005994222436206067, "loss": 0.072, "theoretical_loss": 3.5796773569778026, "tokens_seen": 1341652992 }, { "epoch": 0.41, "learning_rate": 0.0005993419996790242, "loss": 0.0713, "theoretical_loss": 3.579614870090374, "tokens_seen": 1341915136 }, { "epoch": 0.41, "learning_rate": 0.0005992617557374418, "loss": 0.0749, "theoretical_loss": 3.5795523988257654, "tokens_seen": 1342177280 }, { "epoch": 0.41, "learning_rate": 0.0005991815117958594, "loss": 0.072, "theoretical_loss": 3.5794899431770215, "tokens_seen": 1342439424 }, { "epoch": 0.41, "learning_rate": 0.000599101267854277, "loss": 0.0733, "theoretical_loss": 3.5794275031371896, "tokens_seen": 1342701568 }, { "epoch": 0.41, "learning_rate": 0.0005990210239126947, "loss": 0.0694, "theoretical_loss": 3.579365078699323, "tokens_seen": 1342963712 }, { "epoch": 0.41, "learning_rate": 0.0005989407799711122, "loss": 0.072, "theoretical_loss": 3.579302669856479, "tokens_seen": 1343225856 }, { "epoch": 0.41, "objective/train/advantage_avg": -2.97992642117606e-06, "objective/train/docs_used": 490931, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4495521783828735, "objective/train/original_loss": 1.449552059173584, "objective/train/theoretical_loss": 3.5792402766017197, "objective/train/tokens_used": 1363948000, "objective/train/value_avg": -0.00823211669921875, "objective/train/value_loss": 0.0004934009630233049, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.64599609375, "objective/train/value_reward_corr": 0.603769803863347, "objective/train/value_std": 0.01506805419921875, "objective/train/weight_avg": 1.000206470489502, "objective/train/weighted_lm_loss": 1.4500666856765747, "objective/train/weights_max": 1.8629971742630005, "objective/train/weights_min": 0.3684026002883911, "theoretical_loss": 3.5792402766017197, "tokens_seen": 1343488000 }, { "epoch": 0.41, "learning_rate": 0.0005988605360295298, "loss": 0.0703, "theoretical_loss": 3.5792402766017197, "tokens_seen": 1343488000 }, { "epoch": 0.41, "learning_rate": 0.0005987802920879474, "loss": 0.0692, "theoretical_loss": 3.57917789892811, "tokens_seen": 1343750144 }, { "epoch": 0.41, "learning_rate": 0.0005987000481463649, "loss": 0.0691, "theoretical_loss": 3.579115536828721, "tokens_seen": 1344012288 }, { "epoch": 0.41, "learning_rate": 0.0005986198042047825, "loss": 0.07, "theoretical_loss": 3.5790531902966274, "tokens_seen": 1344274432 }, { "epoch": 0.41, "learning_rate": 0.0005985395602632001, "loss": 0.0698, "theoretical_loss": 3.578990859324909, "tokens_seen": 1344536576 }, { "epoch": 0.41, "learning_rate": 0.0005984593163216177, "loss": 0.0697, "theoretical_loss": 3.5789285439066494, "tokens_seen": 1344798720 }, { "epoch": 0.41, "learning_rate": 0.0005983790723800354, "loss": 0.0701, "theoretical_loss": 3.578866244034937, "tokens_seen": 1345060864 }, { "epoch": 0.41, "learning_rate": 0.000598298828438453, "loss": 0.0733, "theoretical_loss": 3.5788039597028636, "tokens_seen": 1345323008 }, { "epoch": 0.41, "learning_rate": 0.0005982185844968705, "loss": 0.0677, "theoretical_loss": 3.5787416909035272, "tokens_seen": 1345585152 }, { "epoch": 0.41, "learning_rate": 0.0005981383405552881, "loss": 0.0707, "theoretical_loss": 3.578679437630029, "tokens_seen": 1345847296 }, { "epoch": 0.41, "learning_rate": 0.0005980580966137057, "loss": 0.072, "theoretical_loss": 3.5786171998754748, "tokens_seen": 1346109440 }, { "epoch": 0.41, "learning_rate": 0.0005979778526721232, "loss": 0.0697, "theoretical_loss": 3.5785549776329746, "tokens_seen": 1346371584 }, { "epoch": 0.41, "learning_rate": 0.0005978976087305409, "loss": 0.0695, "theoretical_loss": 3.578492770895643, "tokens_seen": 1346633728 }, { "epoch": 0.41, "learning_rate": 0.0005978173647889584, "loss": 0.0674, "theoretical_loss": 3.5784305796566, "tokens_seen": 1346895872 }, { "epoch": 0.41, "learning_rate": 0.000597737120847376, "loss": 0.0706, "theoretical_loss": 3.5783684039089687, "tokens_seen": 1347158016 }, { "epoch": 0.41, "learning_rate": 0.0005976568769057937, "loss": 0.0689, "theoretical_loss": 3.578306243645876, "tokens_seen": 1347420160 }, { "epoch": 0.41, "learning_rate": 0.0005975766329642112, "loss": 0.0684, "theoretical_loss": 3.5782440988604547, "tokens_seen": 1347682304 }, { "epoch": 0.41, "learning_rate": 0.0005974963890226288, "loss": 0.0699, "theoretical_loss": 3.5781819695458417, "tokens_seen": 1347944448 }, { "epoch": 0.41, "learning_rate": 0.0005974161450810464, "loss": 0.0717, "theoretical_loss": 3.578119855695178, "tokens_seen": 1348206592 }, { "epoch": 0.41, "learning_rate": 0.000597335901139464, "loss": 0.0701, "theoretical_loss": 3.5780577573016084, "tokens_seen": 1348468736 }, { "epoch": 0.41, "learning_rate": 0.0005972556571978816, "loss": 0.0726, "theoretical_loss": 3.5779956743582835, "tokens_seen": 1348730880 }, { "epoch": 0.41, "learning_rate": 0.0005971754132562992, "loss": 0.0705, "theoretical_loss": 3.5779336068583563, "tokens_seen": 1348993024 }, { "epoch": 0.41, "learning_rate": 0.0005970951693147167, "loss": 0.0716, "theoretical_loss": 3.577871554794986, "tokens_seen": 1349255168 }, { "epoch": 0.41, "learning_rate": 0.0005970149253731343, "loss": 0.0678, "theoretical_loss": 3.5778095181613354, "tokens_seen": 1349517312 }, { "epoch": 0.41, "learning_rate": 0.000596934681431552, "loss": 0.0713, "theoretical_loss": 3.577747496950572, "tokens_seen": 1349779456 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0013337216805666685, "objective/train/docs_used": 493399, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.339576244354248, "objective/train/original_loss": 1.3395761251449585, "objective/train/theoretical_loss": 3.577685491155866, "objective/train/tokens_used": 1370501600, "objective/train/value_avg": -0.00826263427734375, "objective/train/value_loss": 0.00026566104497760534, "objective/train/value_max": -0.0001252889633178711, "objective/train/value_min": -0.6865234375, "objective/train/value_reward_corr": 0.5799900901845774, "objective/train/value_std": 0.01233673095703125, "objective/train/weight_avg": 1.0014503002166748, "objective/train/weighted_lm_loss": 1.341780185699463, "objective/train/weights_max": 1.6780211925506592, "objective/train/weights_min": 0.3697971701622009, "theoretical_loss": 3.577685491155866, "tokens_seen": 1350041600 }, { "epoch": 0.41, "learning_rate": 0.0005968544374899695, "loss": 0.0717, "theoretical_loss": 3.577685491155866, "tokens_seen": 1350041600 }, { "epoch": 0.41, "learning_rate": 0.0005967741935483872, "loss": 0.0714, "theoretical_loss": 3.577623500770394, "tokens_seen": 1350303744 }, { "epoch": 0.41, "learning_rate": 0.0005966939496068047, "loss": 0.0704, "theoretical_loss": 3.577561525787337, "tokens_seen": 1350565888 }, { "epoch": 0.41, "learning_rate": 0.0005966137056652223, "loss": 0.0696, "theoretical_loss": 3.5774995661998785, "tokens_seen": 1350828032 }, { "epoch": 0.41, "learning_rate": 0.0005965334617236399, "loss": 0.0701, "theoretical_loss": 3.5774376220012085, "tokens_seen": 1351090176 }, { "epoch": 0.41, "learning_rate": 0.0005964532177820575, "loss": 0.0732, "theoretical_loss": 3.5773756931845186, "tokens_seen": 1351352320 }, { "epoch": 0.41, "learning_rate": 0.000596372973840475, "loss": 0.071, "theoretical_loss": 3.5773137797430077, "tokens_seen": 1351614464 }, { "epoch": 0.41, "learning_rate": 0.0005962927298988926, "loss": 0.0719, "theoretical_loss": 3.577251881669877, "tokens_seen": 1351876608 }, { "epoch": 0.41, "learning_rate": 0.0005962124859573102, "loss": 0.0755, "theoretical_loss": 3.5771899989583336, "tokens_seen": 1352138752 }, { "epoch": 0.41, "learning_rate": 0.0005961322420157277, "loss": 0.0694, "theoretical_loss": 3.577128131601587, "tokens_seen": 1352400896 }, { "epoch": 0.41, "learning_rate": 0.0005960519980741455, "loss": 0.0678, "theoretical_loss": 3.5770662795928527, "tokens_seen": 1352663040 }, { "epoch": 0.41, "learning_rate": 0.000595971754132563, "loss": 0.0681, "theoretical_loss": 3.5770044429253494, "tokens_seen": 1352925184 }, { "epoch": 0.41, "learning_rate": 0.0005958915101909807, "loss": 0.0701, "theoretical_loss": 3.576942621592301, "tokens_seen": 1353187328 }, { "epoch": 0.41, "learning_rate": 0.0005958112662493982, "loss": 0.0709, "theoretical_loss": 3.576880815586935, "tokens_seen": 1353449472 }, { "epoch": 0.41, "learning_rate": 0.0005957310223078157, "loss": 0.0713, "theoretical_loss": 3.576819024902483, "tokens_seen": 1353711616 }, { "epoch": 0.41, "learning_rate": 0.0005956507783662334, "loss": 0.0739, "theoretical_loss": 3.576757249532183, "tokens_seen": 1353973760 }, { "epoch": 0.41, "learning_rate": 0.0005955705344246509, "loss": 0.0697, "theoretical_loss": 3.576695489469274, "tokens_seen": 1354235904 }, { "epoch": 0.41, "learning_rate": 0.0005954902904830685, "loss": 0.0713, "theoretical_loss": 3.5766337447070016, "tokens_seen": 1354498048 }, { "epoch": 0.41, "learning_rate": 0.0005954100465414862, "loss": 0.0706, "theoretical_loss": 3.5765720152386153, "tokens_seen": 1354760192 }, { "epoch": 0.41, "learning_rate": 0.0005953298025999038, "loss": 0.0706, "theoretical_loss": 3.5765103010573682, "tokens_seen": 1355022336 }, { "epoch": 0.41, "learning_rate": 0.0005952495586583213, "loss": 0.071, "theoretical_loss": 3.576448602156518, "tokens_seen": 1355284480 }, { "epoch": 0.41, "learning_rate": 0.0005951693147167389, "loss": 0.0693, "theoretical_loss": 3.5763869185293276, "tokens_seen": 1355546624 }, { "epoch": 0.41, "learning_rate": 0.0005950890707751565, "loss": 0.0729, "theoretical_loss": 3.576325250169062, "tokens_seen": 1355808768 }, { "epoch": 0.41, "learning_rate": 0.000595008826833574, "loss": 0.0696, "theoretical_loss": 3.5762635970689933, "tokens_seen": 1356070912 }, { "epoch": 0.41, "learning_rate": 0.0005949285828919917, "loss": 0.0689, "theoretical_loss": 3.576201959222396, "tokens_seen": 1356333056 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0003139876062050462, "objective/train/docs_used": 495730, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4432828426361084, "objective/train/original_loss": 1.443282961845398, "objective/train/theoretical_loss": 3.576140336622548, "objective/train/tokens_used": 1377055200, "objective/train/value_avg": -0.0080413818359375, "objective/train/value_loss": 0.0003055589913856238, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.8916015625, "objective/train/value_reward_corr": 0.8504052999798968, "objective/train/value_std": 0.0272674560546875, "objective/train/weight_avg": 1.0004558563232422, "objective/train/weighted_lm_loss": 1.4439336061477661, "objective/train/weights_max": 1.514553189277649, "objective/train/weights_min": 0.37005260586738586, "theoretical_loss": 3.576140336622548, "tokens_seen": 1356595200 }, { "epoch": 0.41, "learning_rate": 0.0005948483389504092, "loss": 0.0699, "theoretical_loss": 3.576140336622548, "tokens_seen": 1356595200 }, { "epoch": 0.41, "learning_rate": 0.0005947680950088269, "loss": 0.0727, "theoretical_loss": 3.5760787292627345, "tokens_seen": 1356857344 }, { "epoch": 0.41, "learning_rate": 0.0005946878510672444, "loss": 0.0728, "theoretical_loss": 3.576017137136242, "tokens_seen": 1357119488 }, { "epoch": 0.41, "learning_rate": 0.000594607607125662, "loss": 0.0709, "theoretical_loss": 3.5759555602363635, "tokens_seen": 1357381632 }, { "epoch": 0.41, "learning_rate": 0.0005945273631840797, "loss": 0.068, "theoretical_loss": 3.5758939985563942, "tokens_seen": 1357643776 }, { "epoch": 0.41, "learning_rate": 0.0005944471192424972, "loss": 0.0709, "theoretical_loss": 3.5758324520896347, "tokens_seen": 1357905920 }, { "epoch": 0.41, "learning_rate": 0.0005943668753009148, "loss": 0.0719, "theoretical_loss": 3.57577092082939, "tokens_seen": 1358168064 }, { "epoch": 0.41, "learning_rate": 0.0005942866313593324, "loss": 0.0709, "theoretical_loss": 3.5757094047689684, "tokens_seen": 1358430208 }, { "epoch": 0.41, "learning_rate": 0.00059420638741775, "loss": 0.0696, "theoretical_loss": 3.575647903901684, "tokens_seen": 1358692352 }, { "epoch": 0.41, "learning_rate": 0.0005941261434761675, "loss": 0.0713, "theoretical_loss": 3.575586418220853, "tokens_seen": 1358954496 }, { "epoch": 0.41, "learning_rate": 0.0005940458995345851, "loss": 0.0717, "theoretical_loss": 3.5755249477197983, "tokens_seen": 1359216640 }, { "epoch": 0.41, "learning_rate": 0.0005939656555930027, "loss": 0.0695, "theoretical_loss": 3.5754634923918447, "tokens_seen": 1359478784 }, { "epoch": 0.41, "learning_rate": 0.0005938854116514203, "loss": 0.0694, "theoretical_loss": 3.5754020522303227, "tokens_seen": 1359740928 }, { "epoch": 0.41, "learning_rate": 0.000593805167709838, "loss": 0.0697, "theoretical_loss": 3.575340627228566, "tokens_seen": 1360003072 }, { "epoch": 0.41, "learning_rate": 0.0005937249237682555, "loss": 0.0711, "theoretical_loss": 3.575279217379914, "tokens_seen": 1360265216 }, { "epoch": 0.41, "learning_rate": 0.0005936446798266731, "loss": 0.0737, "theoretical_loss": 3.575217822677709, "tokens_seen": 1360527360 }, { "epoch": 0.41, "learning_rate": 0.0005935644358850907, "loss": 0.0698, "theoretical_loss": 3.575156443115297, "tokens_seen": 1360789504 }, { "epoch": 0.41, "learning_rate": 0.0005934841919435082, "loss": 0.0699, "theoretical_loss": 3.5750950786860307, "tokens_seen": 1361051648 }, { "epoch": 0.41, "learning_rate": 0.0005934039480019259, "loss": 0.0703, "theoretical_loss": 3.5750337293832644, "tokens_seen": 1361313792 }, { "epoch": 0.41, "learning_rate": 0.0005933237040603434, "loss": 0.0682, "theoretical_loss": 3.5749723952003576, "tokens_seen": 1361575936 }, { "epoch": 0.41, "learning_rate": 0.000593243460118761, "loss": 0.0742, "theoretical_loss": 3.5749110761306744, "tokens_seen": 1361838080 }, { "epoch": 0.41, "learning_rate": 0.0005931632161771787, "loss": 0.0693, "theoretical_loss": 3.5748497721675823, "tokens_seen": 1362100224 }, { "epoch": 0.41, "learning_rate": 0.0005930829722355963, "loss": 0.0723, "theoretical_loss": 3.574788483304453, "tokens_seen": 1362362368 }, { "epoch": 0.41, "learning_rate": 0.0005930027282940138, "loss": 0.07, "theoretical_loss": 3.5747272095346636, "tokens_seen": 1362624512 }, { "epoch": 0.41, "learning_rate": 0.0005929224843524315, "loss": 0.0717, "theoretical_loss": 3.5746659508515943, "tokens_seen": 1362886656 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0005423167604021728, "objective/train/docs_used": 498147, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3880330324172974, "objective/train/original_loss": 1.3880329132080078, "objective/train/theoretical_loss": 3.5746047072486293, "objective/train/tokens_used": 1383608800, "objective/train/value_avg": -0.00732421875, "objective/train/value_loss": 0.0004709110071416944, "objective/train/value_max": -7.724761962890625e-05, "objective/train/value_min": -0.80078125, "objective/train/value_reward_corr": 0.6727173948210152, "objective/train/value_std": 0.0166168212890625, "objective/train/weight_avg": 1.0007424354553223, "objective/train/weighted_lm_loss": 1.3886373043060303, "objective/train/weights_max": 1.4593173265457153, "objective/train/weights_min": 0.3683730661869049, "theoretical_loss": 3.5746047072486293, "tokens_seen": 1363148800 }, { "epoch": 0.41, "learning_rate": 0.000592842240410849, "loss": 0.0711, "theoretical_loss": 3.5746047072486293, "tokens_seen": 1363148800 }, { "epoch": 0.41, "learning_rate": 0.0005927619964692665, "loss": 0.0691, "theoretical_loss": 3.574543478719158, "tokens_seen": 1363410944 }, { "epoch": 0.41, "learning_rate": 0.0005926817525276842, "loss": 0.07, "theoretical_loss": 3.5744822652565724, "tokens_seen": 1363673088 }, { "epoch": 0.41, "learning_rate": 0.0005926015085861017, "loss": 0.0727, "theoretical_loss": 3.5744210668542706, "tokens_seen": 1363935232 }, { "epoch": 0.41, "learning_rate": 0.0005925212646445193, "loss": 0.0721, "theoretical_loss": 3.574359883505653, "tokens_seen": 1364197376 }, { "epoch": 0.41, "learning_rate": 0.000592441020702937, "loss": 0.0725, "theoretical_loss": 3.5742987152041255, "tokens_seen": 1364459520 }, { "epoch": 0.41, "learning_rate": 0.0005923607767613546, "loss": 0.0688, "theoretical_loss": 3.574237561943098, "tokens_seen": 1364721664 }, { "epoch": 0.41, "learning_rate": 0.0005922805328197721, "loss": 0.0713, "theoretical_loss": 3.5741764237159837, "tokens_seen": 1364983808 }, { "epoch": 0.41, "learning_rate": 0.0005922002888781897, "loss": 0.0721, "theoretical_loss": 3.5741153005162003, "tokens_seen": 1365245952 }, { "epoch": 0.41, "learning_rate": 0.0005921200449366073, "loss": 0.0729, "theoretical_loss": 3.57405419233717, "tokens_seen": 1365508096 }, { "epoch": 0.41, "learning_rate": 0.0005920398009950249, "loss": 0.071, "theoretical_loss": 3.5739930991723194, "tokens_seen": 1365770240 }, { "epoch": 0.41, "learning_rate": 0.0005919595570534425, "loss": 0.0735, "theoretical_loss": 3.5739320210150787, "tokens_seen": 1366032384 }, { "epoch": 0.41, "learning_rate": 0.00059187931311186, "loss": 0.0714, "theoretical_loss": 3.5738709578588814, "tokens_seen": 1366294528 }, { "epoch": 0.41, "learning_rate": 0.0005917990691702777, "loss": 0.0687, "theoretical_loss": 3.573809909697167, "tokens_seen": 1366556672 }, { "epoch": 0.41, "learning_rate": 0.0005917188252286952, "loss": 0.07, "theoretical_loss": 3.573748876523379, "tokens_seen": 1366818816 }, { "epoch": 0.41, "learning_rate": 0.0005916385812871128, "loss": 0.0717, "theoretical_loss": 3.5736878583309624, "tokens_seen": 1367080960 }, { "epoch": 0.41, "learning_rate": 0.0005915583373455305, "loss": 0.0716, "theoretical_loss": 3.5736268551133694, "tokens_seen": 1367343104 }, { "epoch": 0.41, "learning_rate": 0.000591478093403948, "loss": 0.0725, "theoretical_loss": 3.5735658668640538, "tokens_seen": 1367605248 }, { "epoch": 0.41, "learning_rate": 0.0005913978494623656, "loss": 0.0698, "theoretical_loss": 3.573504893576476, "tokens_seen": 1367867392 }, { "epoch": 0.41, "learning_rate": 0.0005913176055207832, "loss": 0.0698, "theoretical_loss": 3.573443935244099, "tokens_seen": 1368129536 }, { "epoch": 0.41, "learning_rate": 0.0005912373615792008, "loss": 0.0702, "theoretical_loss": 3.5733829918603903, "tokens_seen": 1368391680 }, { "epoch": 0.41, "learning_rate": 0.0005911571176376183, "loss": 0.0728, "theoretical_loss": 3.573322063418821, "tokens_seen": 1368653824 }, { "epoch": 0.41, "learning_rate": 0.0005910768736960359, "loss": 0.0712, "theoretical_loss": 3.5732611499128666, "tokens_seen": 1368915968 }, { "epoch": 0.41, "learning_rate": 0.0005909966297544535, "loss": 0.07, "theoretical_loss": 3.5732002513360075, "tokens_seen": 1369178112 }, { "epoch": 0.41, "learning_rate": 0.0005909163858128712, "loss": 0.0698, "theoretical_loss": 3.5731393676817267, "tokens_seen": 1369440256 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0006691030575893819, "objective/train/docs_used": 500422, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.462990403175354, "objective/train/original_loss": 1.462990164756775, "objective/train/theoretical_loss": 3.573078498943513, "objective/train/tokens_used": 1390162400, "objective/train/value_avg": -0.01076507568359375, "objective/train/value_loss": 0.00031313643557950854, "objective/train/value_max": -6.812810897827148e-05, "objective/train/value_min": -0.68896484375, "objective/train/value_reward_corr": 0.7292926677177232, "objective/train/value_std": 0.0184326171875, "objective/train/weight_avg": 1.0008124113082886, "objective/train/weighted_lm_loss": 1.4639732837677002, "objective/train/weights_max": 1.8267512321472168, "objective/train/weights_min": 0.37275585532188416, "theoretical_loss": 3.573078498943513, "tokens_seen": 1369702400 }, { "epoch": 0.42, "learning_rate": 0.0005908361418712888, "loss": 0.0711, "theoretical_loss": 3.573078498943513, "tokens_seen": 1369702400 }, { "epoch": 0.42, "learning_rate": 0.0005907558979297063, "loss": 0.0716, "theoretical_loss": 3.5730176451148568, "tokens_seen": 1369964544 }, { "epoch": 0.42, "learning_rate": 0.000590675653988124, "loss": 0.0698, "theoretical_loss": 3.572956806189256, "tokens_seen": 1370226688 }, { "epoch": 0.42, "learning_rate": 0.0005905954100465415, "loss": 0.0707, "theoretical_loss": 3.5728959821602095, "tokens_seen": 1370488832 }, { "epoch": 0.42, "learning_rate": 0.000590515166104959, "loss": 0.0699, "theoretical_loss": 3.5728351730212218, "tokens_seen": 1370750976 }, { "epoch": 0.42, "learning_rate": 0.0005904349221633767, "loss": 0.0707, "theoretical_loss": 3.5727743787658017, "tokens_seen": 1371013120 }, { "epoch": 0.42, "learning_rate": 0.0005903546782217942, "loss": 0.0747, "theoretical_loss": 3.572713599387461, "tokens_seen": 1371275264 }, { "epoch": 0.42, "learning_rate": 0.0005902744342802118, "loss": 0.0695, "theoretical_loss": 3.572652834879716, "tokens_seen": 1371537408 }, { "epoch": 0.42, "learning_rate": 0.0005901941903386295, "loss": 0.0691, "theoretical_loss": 3.5725920852360877, "tokens_seen": 1371799552 }, { "epoch": 0.42, "learning_rate": 0.0005901139463970471, "loss": 0.0688, "theoretical_loss": 3.5725313504501006, "tokens_seen": 1372061696 }, { "epoch": 0.42, "learning_rate": 0.0005900337024554646, "loss": 0.0725, "theoretical_loss": 3.5724706305152827, "tokens_seen": 1372323840 }, { "epoch": 0.42, "learning_rate": 0.0005899534585138823, "loss": 0.0667, "theoretical_loss": 3.572409925425167, "tokens_seen": 1372585984 }, { "epoch": 0.42, "learning_rate": 0.0005898732145722998, "loss": 0.0709, "theoretical_loss": 3.5723492351732906, "tokens_seen": 1372848128 }, { "epoch": 0.42, "learning_rate": 0.0005897929706307173, "loss": 0.0702, "theoretical_loss": 3.572288559753194, "tokens_seen": 1373110272 }, { "epoch": 0.42, "learning_rate": 0.000589712726689135, "loss": 0.0747, "theoretical_loss": 3.5722278991584218, "tokens_seen": 1373372416 }, { "epoch": 0.42, "learning_rate": 0.0005896324827475525, "loss": 0.071, "theoretical_loss": 3.572167253382523, "tokens_seen": 1373634560 }, { "epoch": 0.42, "learning_rate": 0.0005895522388059702, "loss": 0.0739, "theoretical_loss": 3.5721066224190503, "tokens_seen": 1373896704 }, { "epoch": 0.42, "learning_rate": 0.0005894719948643878, "loss": 0.0702, "theoretical_loss": 3.572046006261561, "tokens_seen": 1374158848 }, { "epoch": 0.42, "learning_rate": 0.0005893917509228054, "loss": 0.0677, "theoretical_loss": 3.5719854049036153, "tokens_seen": 1374420992 }, { "epoch": 0.42, "learning_rate": 0.000589311506981223, "loss": 0.0692, "theoretical_loss": 3.571924818338779, "tokens_seen": 1374683136 }, { "epoch": 0.42, "learning_rate": 0.0005892312630396405, "loss": 0.0685, "theoretical_loss": 3.5718642465606214, "tokens_seen": 1374945280 }, { "epoch": 0.42, "learning_rate": 0.0005891510190980581, "loss": 0.0684, "theoretical_loss": 3.571803689562714, "tokens_seen": 1375207424 }, { "epoch": 0.42, "learning_rate": 0.0005890707751564757, "loss": 0.0722, "theoretical_loss": 3.571743147338635, "tokens_seen": 1375469568 }, { "epoch": 0.42, "learning_rate": 0.0005889905312148933, "loss": 0.0718, "theoretical_loss": 3.5716826198819653, "tokens_seen": 1375731712 }, { "epoch": 0.42, "learning_rate": 0.0005889102872733108, "loss": 0.0732, "theoretical_loss": 3.57162210718629, "tokens_seen": 1375993856 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.001140189473517239, "objective/train/docs_used": 502650, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.329345464706421, "objective/train/original_loss": 1.329345464706421, "objective/train/theoretical_loss": 3.5715616092451983, "objective/train/tokens_used": 1396716000, "objective/train/value_avg": -0.009979248046875, "objective/train/value_loss": 0.0002272567362524569, "objective/train/value_max": -6.35385513305664e-05, "objective/train/value_min": -0.2432861328125, "objective/train/value_reward_corr": 0.7523422487981606, "objective/train/value_std": 0.01526641845703125, "objective/train/weight_avg": 1.0012439489364624, "objective/train/weighted_lm_loss": 1.3301997184753418, "objective/train/weights_max": 1.2260957956314087, "objective/train/weights_min": 0.3730090260505676, "theoretical_loss": 3.5715616092451983, "tokens_seen": 1376256000 }, { "epoch": 0.42, "learning_rate": 0.0005888300433317285, "loss": 0.0693, "theoretical_loss": 3.5715616092451983, "tokens_seen": 1376256000 }, { "epoch": 0.42, "learning_rate": 0.000588749799390146, "loss": 0.0676, "theoretical_loss": 3.5715011260522824, "tokens_seen": 1376518144 }, { "epoch": 0.42, "learning_rate": 0.0005886695554485636, "loss": 0.0701, "theoretical_loss": 3.57144065760114, "tokens_seen": 1376780288 }, { "epoch": 0.42, "learning_rate": 0.0005885893115069813, "loss": 0.071, "theoretical_loss": 3.5713802038853726, "tokens_seen": 1377042432 }, { "epoch": 0.42, "learning_rate": 0.0005885090675653988, "loss": 0.0677, "theoretical_loss": 3.5713197648985844, "tokens_seen": 1377304576 }, { "epoch": 0.42, "learning_rate": 0.0005884288236238165, "loss": 0.0722, "theoretical_loss": 3.571259340634385, "tokens_seen": 1377566720 }, { "epoch": 0.42, "learning_rate": 0.000588348579682234, "loss": 0.0681, "theoretical_loss": 3.5711989310863874, "tokens_seen": 1377828864 }, { "epoch": 0.42, "learning_rate": 0.0005882683357406516, "loss": 0.0663, "theoretical_loss": 3.571138536248209, "tokens_seen": 1378091008 }, { "epoch": 0.42, "learning_rate": 0.0005881880917990692, "loss": 0.0715, "theoretical_loss": 3.57107815611347, "tokens_seen": 1378353152 }, { "epoch": 0.42, "learning_rate": 0.0005881078478574867, "loss": 0.0718, "theoretical_loss": 3.571017790675796, "tokens_seen": 1378615296 }, { "epoch": 0.42, "learning_rate": 0.0005880276039159043, "loss": 0.0712, "theoretical_loss": 3.570957439928815, "tokens_seen": 1378877440 }, { "epoch": 0.42, "learning_rate": 0.000587947359974322, "loss": 0.0708, "theoretical_loss": 3.5708971038661614, "tokens_seen": 1379139584 }, { "epoch": 0.42, "learning_rate": 0.0005878671160327396, "loss": 0.0693, "theoretical_loss": 3.5708367824814715, "tokens_seen": 1379401728 }, { "epoch": 0.42, "learning_rate": 0.0005877868720911571, "loss": 0.0706, "theoretical_loss": 3.570776475768386, "tokens_seen": 1379663872 }, { "epoch": 0.42, "learning_rate": 0.0005877066281495748, "loss": 0.0715, "theoretical_loss": 3.57071618372055, "tokens_seen": 1379926016 }, { "epoch": 0.42, "learning_rate": 0.0005876263842079923, "loss": 0.0719, "theoretical_loss": 3.570655906331612, "tokens_seen": 1380188160 }, { "epoch": 0.42, "learning_rate": 0.0005875461402664098, "loss": 0.0708, "theoretical_loss": 3.570595643595225, "tokens_seen": 1380450304 }, { "epoch": 0.42, "learning_rate": 0.0005874658963248275, "loss": 0.0709, "theoretical_loss": 3.570535395505045, "tokens_seen": 1380712448 }, { "epoch": 0.42, "learning_rate": 0.000587385652383245, "loss": 0.0682, "theoretical_loss": 3.570475162054734, "tokens_seen": 1380974592 }, { "epoch": 0.42, "learning_rate": 0.0005873054084416626, "loss": 0.0705, "theoretical_loss": 3.570414943237956, "tokens_seen": 1381236736 }, { "epoch": 0.42, "learning_rate": 0.0005872251645000803, "loss": 0.0705, "theoretical_loss": 3.570354739048379, "tokens_seen": 1381498880 }, { "epoch": 0.42, "learning_rate": 0.0005871449205584979, "loss": 0.0694, "theoretical_loss": 3.5702945494796765, "tokens_seen": 1381761024 }, { "epoch": 0.42, "learning_rate": 0.0005870646766169155, "loss": 0.0708, "theoretical_loss": 3.5702343745255236, "tokens_seen": 1382023168 }, { "epoch": 0.42, "learning_rate": 0.0005869844326753331, "loss": 0.0707, "theoretical_loss": 3.5701742141796022, "tokens_seen": 1382285312 }, { "epoch": 0.42, "learning_rate": 0.0005869041887337506, "loss": 0.0684, "theoretical_loss": 3.570114068435595, "tokens_seen": 1382547456 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.0004002986242994666, "objective/train/docs_used": 505055, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4260497093200684, "objective/train/original_loss": 1.4260497093200684, "objective/train/theoretical_loss": 3.570053937287192, "objective/train/tokens_used": 1403269600, "objective/train/value_avg": -0.01218414306640625, "objective/train/value_loss": 0.00020151071657892317, "objective/train/value_max": -9.244680404663086e-05, "objective/train/value_min": -0.576171875, "objective/train/value_reward_corr": 0.8434513792631848, "objective/train/value_std": 0.022979736328125, "objective/train/weight_avg": 1.0004942417144775, "objective/train/weighted_lm_loss": 1.4266375303268433, "objective/train/weights_max": 1.1901416778564453, "objective/train/weights_min": 0.3686957061290741, "theoretical_loss": 3.570053937287192, "tokens_seen": 1382809600 }, { "epoch": 0.42, "learning_rate": 0.0005868239447921682, "loss": 0.0716, "theoretical_loss": 3.570053937287192, "tokens_seen": 1382809600 }, { "epoch": 0.42, "learning_rate": 0.0005867437008505858, "loss": 0.07, "theoretical_loss": 3.569993820728084, "tokens_seen": 1383071744 }, { "epoch": 0.42, "learning_rate": 0.0005866634569090033, "loss": 0.0686, "theoretical_loss": 3.569933718751967, "tokens_seen": 1383333888 }, { "epoch": 0.42, "learning_rate": 0.000586583212967421, "loss": 0.0708, "theoretical_loss": 3.569873631352542, "tokens_seen": 1383596032 }, { "epoch": 0.42, "learning_rate": 0.0005865029690258386, "loss": 0.0705, "theoretical_loss": 3.5698135585235122, "tokens_seen": 1383858176 }, { "epoch": 0.42, "learning_rate": 0.0005864227250842562, "loss": 0.0684, "theoretical_loss": 3.5697535002585856, "tokens_seen": 1384120320 }, { "epoch": 0.42, "learning_rate": 0.0005863424811426738, "loss": 0.0703, "theoretical_loss": 3.569693456551474, "tokens_seen": 1384382464 }, { "epoch": 0.42, "learning_rate": 0.0005862622372010913, "loss": 0.0721, "theoretical_loss": 3.5696334273958925, "tokens_seen": 1384644608 }, { "epoch": 0.42, "learning_rate": 0.0005861819932595089, "loss": 0.0702, "theoretical_loss": 3.569573412785561, "tokens_seen": 1384906752 }, { "epoch": 0.42, "learning_rate": 0.0005861017493179265, "loss": 0.0719, "theoretical_loss": 3.569513412714203, "tokens_seen": 1385168896 }, { "epoch": 0.42, "learning_rate": 0.0005860215053763441, "loss": 0.073, "theoretical_loss": 3.569453427175546, "tokens_seen": 1385431040 }, { "epoch": 0.42, "learning_rate": 0.0005859412614347616, "loss": 0.0689, "theoretical_loss": 3.5693934561633203, "tokens_seen": 1385693184 }, { "epoch": 0.42, "learning_rate": 0.0005858610174931793, "loss": 0.0692, "theoretical_loss": 3.5693334996712625, "tokens_seen": 1385955328 }, { "epoch": 0.42, "learning_rate": 0.0005857807735515968, "loss": 0.0708, "theoretical_loss": 3.5692735576931103, "tokens_seen": 1386217472 }, { "epoch": 0.42, "learning_rate": 0.0005857005296100145, "loss": 0.0675, "theoretical_loss": 3.569213630222607, "tokens_seen": 1386479616 }, { "epoch": 0.42, "learning_rate": 0.0005856202856684321, "loss": 0.07, "theoretical_loss": 3.5691537172535, "tokens_seen": 1386741760 }, { "epoch": 0.42, "learning_rate": 0.0005855400417268496, "loss": 0.0676, "theoretical_loss": 3.569093818779539, "tokens_seen": 1387003904 }, { "epoch": 0.42, "learning_rate": 0.0005854597977852673, "loss": 0.0737, "theoretical_loss": 3.5690339347944784, "tokens_seen": 1387266048 }, { "epoch": 0.42, "learning_rate": 0.0005853795538436848, "loss": 0.0684, "theoretical_loss": 3.568974065292077, "tokens_seen": 1387528192 }, { "epoch": 0.42, "learning_rate": 0.0005852993099021024, "loss": 0.0723, "theoretical_loss": 3.5689142102660973, "tokens_seen": 1387790336 }, { "epoch": 0.42, "learning_rate": 0.00058521906596052, "loss": 0.0679, "theoretical_loss": 3.568854369710305, "tokens_seen": 1388052480 }, { "epoch": 0.42, "learning_rate": 0.0005851388220189375, "loss": 0.067, "theoretical_loss": 3.5687945436184703, "tokens_seen": 1388314624 }, { "epoch": 0.42, "learning_rate": 0.0005850585780773551, "loss": 0.0702, "theoretical_loss": 3.5687347319843665, "tokens_seen": 1388576768 }, { "epoch": 0.42, "learning_rate": 0.0005849783341357728, "loss": 0.0707, "theoretical_loss": 3.5686749348017726, "tokens_seen": 1388838912 }, { "epoch": 0.42, "learning_rate": 0.0005848980901941904, "loss": 0.0717, "theoretical_loss": 3.5686151520644684, "tokens_seen": 1389101056 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.0012033990351483226, "objective/train/docs_used": 507487, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3310184478759766, "objective/train/original_loss": 1.3310184478759766, "objective/train/theoretical_loss": 3.56855538376624, "objective/train/tokens_used": 1409823200, "objective/train/value_avg": -0.006999969482421875, "objective/train/value_loss": 9.323181438958272e-05, "objective/train/value_max": -7.599592208862305e-05, "objective/train/value_min": -0.252685546875, "objective/train/value_reward_corr": 0.81285075839567, "objective/train/value_std": 0.01432037353515625, "objective/train/weight_avg": 1.0012476444244385, "objective/train/weighted_lm_loss": 1.3323184251785278, "objective/train/weights_max": 1.1576061248779297, "objective/train/weights_min": 0.3713989555835724, "theoretical_loss": 3.56855538376624, "tokens_seen": 1389363200 }, { "epoch": 0.42, "learning_rate": 0.0005848178462526079, "loss": 0.0704, "theoretical_loss": 3.56855538376624, "tokens_seen": 1389363200 }, { "epoch": 0.42, "learning_rate": 0.0005847376023110256, "loss": 0.0683, "theoretical_loss": 3.568495629900877, "tokens_seen": 1389625344 }, { "epoch": 0.42, "learning_rate": 0.0005846573583694431, "loss": 0.0729, "theoretical_loss": 3.5684358904621725, "tokens_seen": 1389887488 }, { "epoch": 0.42, "learning_rate": 0.0005845771144278606, "loss": 0.0675, "theoretical_loss": 3.5683761654439223, "tokens_seen": 1390149632 }, { "epoch": 0.42, "learning_rate": 0.0005844968704862783, "loss": 0.067, "theoretical_loss": 3.5683164548399287, "tokens_seen": 1390411776 }, { "epoch": 0.42, "learning_rate": 0.0005844166265446958, "loss": 0.0721, "theoretical_loss": 3.568256758643995, "tokens_seen": 1390673920 }, { "epoch": 0.42, "learning_rate": 0.0005843363826031135, "loss": 0.0669, "theoretical_loss": 3.5681970768499305, "tokens_seen": 1390936064 }, { "epoch": 0.42, "learning_rate": 0.000584256138661531, "loss": 0.0683, "theoretical_loss": 3.5681374094515466, "tokens_seen": 1391198208 }, { "epoch": 0.42, "learning_rate": 0.0005841758947199487, "loss": 0.0698, "theoretical_loss": 3.5680777564426602, "tokens_seen": 1391460352 }, { "epoch": 0.42, "learning_rate": 0.0005840956507783663, "loss": 0.0667, "theoretical_loss": 3.56801811781709, "tokens_seen": 1391722496 }, { "epoch": 0.42, "learning_rate": 0.0005840154068367838, "loss": 0.0687, "theoretical_loss": 3.5679584935686615, "tokens_seen": 1391984640 }, { "epoch": 0.42, "learning_rate": 0.0005839351628952014, "loss": 0.068, "theoretical_loss": 3.5678988836912007, "tokens_seen": 1392246784 }, { "epoch": 0.42, "learning_rate": 0.000583854918953619, "loss": 0.0691, "theoretical_loss": 3.567839288178539, "tokens_seen": 1392508928 }, { "epoch": 0.42, "learning_rate": 0.0005837746750120366, "loss": 0.0703, "theoretical_loss": 3.5677797070245125, "tokens_seen": 1392771072 }, { "epoch": 0.42, "learning_rate": 0.0005836944310704541, "loss": 0.0688, "theoretical_loss": 3.567720140222959, "tokens_seen": 1393033216 }, { "epoch": 0.42, "learning_rate": 0.0005836141871288718, "loss": 0.0713, "theoretical_loss": 3.567660587767722, "tokens_seen": 1393295360 }, { "epoch": 0.42, "learning_rate": 0.0005835339431872894, "loss": 0.0693, "theoretical_loss": 3.567601049652648, "tokens_seen": 1393557504 }, { "epoch": 0.42, "learning_rate": 0.000583453699245707, "loss": 0.0689, "theoretical_loss": 3.567541525871587, "tokens_seen": 1393819648 }, { "epoch": 0.42, "learning_rate": 0.0005833734553041246, "loss": 0.0691, "theoretical_loss": 3.5674820164183934, "tokens_seen": 1394081792 }, { "epoch": 0.42, "learning_rate": 0.0005832932113625421, "loss": 0.0717, "theoretical_loss": 3.567422521286925, "tokens_seen": 1394343936 }, { "epoch": 0.42, "learning_rate": 0.0005832129674209598, "loss": 0.071, "theoretical_loss": 3.5673630404710432, "tokens_seen": 1394606080 }, { "epoch": 0.42, "learning_rate": 0.0005831327234793773, "loss": 0.0704, "theoretical_loss": 3.567303573964614, "tokens_seen": 1394868224 }, { "epoch": 0.42, "learning_rate": 0.0005830524795377949, "loss": 0.0704, "theoretical_loss": 3.5672441217615063, "tokens_seen": 1395130368 }, { "epoch": 0.42, "learning_rate": 0.0005829722355962125, "loss": 0.0732, "theoretical_loss": 3.5671846838555936, "tokens_seen": 1395392512 }, { "epoch": 0.42, "learning_rate": 0.0005828919916546301, "loss": 0.0706, "theoretical_loss": 3.567125260240752, "tokens_seen": 1395654656 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.0006299561937339604, "objective/train/docs_used": 509897, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3458755016326904, "objective/train/original_loss": 1.3458755016326904, "objective/train/theoretical_loss": 3.5670658509108626, "objective/train/tokens_used": 1416376800, "objective/train/value_avg": -0.006847381591796875, "objective/train/value_loss": 0.0002548126794863492, "objective/train/value_max": -9.763240814208984e-05, "objective/train/value_min": -0.53857421875, "objective/train/value_reward_corr": 0.7935790469163092, "objective/train/value_std": 0.016387939453125, "objective/train/weight_avg": 0.9994886517524719, "objective/train/weighted_lm_loss": 1.345136046409607, "objective/train/weights_max": 1.3395164012908936, "objective/train/weights_min": 0.3683066666126251, "theoretical_loss": 3.5670658509108626, "tokens_seen": 1395916800 }, { "epoch": 0.42, "learning_rate": 0.0005828117477130476, "loss": 0.0683, "theoretical_loss": 3.5670658509108626, "tokens_seen": 1395916800 }, { "epoch": 0.42, "learning_rate": 0.0005827315037714653, "loss": 0.0726, "theoretical_loss": 3.5670064558598096, "tokens_seen": 1396178944 }, { "epoch": 0.42, "learning_rate": 0.0005826512598298829, "loss": 0.0703, "theoretical_loss": 3.5669470750814813, "tokens_seen": 1396441088 }, { "epoch": 0.42, "learning_rate": 0.0005825710158883004, "loss": 0.0712, "theoretical_loss": 3.5668877085697694, "tokens_seen": 1396703232 }, { "epoch": 0.42, "learning_rate": 0.0005824907719467181, "loss": 0.073, "theoretical_loss": 3.5668283563185703, "tokens_seen": 1396965376 }, { "epoch": 0.42, "learning_rate": 0.0005824105280051356, "loss": 0.0695, "theoretical_loss": 3.566769018321782, "tokens_seen": 1397227520 }, { "epoch": 0.42, "learning_rate": 0.0005823302840635532, "loss": 0.0731, "theoretical_loss": 3.5667096945733086, "tokens_seen": 1397489664 }, { "epoch": 0.42, "learning_rate": 0.0005822500401219708, "loss": 0.0695, "theoretical_loss": 3.566650385067057, "tokens_seen": 1397751808 }, { "epoch": 0.42, "learning_rate": 0.0005821697961803883, "loss": 0.0699, "theoretical_loss": 3.5665910897969377, "tokens_seen": 1398013952 }, { "epoch": 0.42, "learning_rate": 0.0005820895522388059, "loss": 0.0724, "theoretical_loss": 3.5665318087568645, "tokens_seen": 1398276096 }, { "epoch": 0.42, "learning_rate": 0.0005820093082972236, "loss": 0.071, "theoretical_loss": 3.5664725419407564, "tokens_seen": 1398538240 }, { "epoch": 0.42, "learning_rate": 0.0005819290643556412, "loss": 0.0704, "theoretical_loss": 3.566413289342535, "tokens_seen": 1398800384 }, { "epoch": 0.42, "learning_rate": 0.0005818488204140588, "loss": 0.0702, "theoretical_loss": 3.566354050956126, "tokens_seen": 1399062528 }, { "epoch": 0.42, "learning_rate": 0.0005817685764724764, "loss": 0.0721, "theoretical_loss": 3.566294826775459, "tokens_seen": 1399324672 }, { "epoch": 0.42, "learning_rate": 0.0005816883325308939, "loss": 0.0683, "theoretical_loss": 3.566235616794466, "tokens_seen": 1399586816 }, { "epoch": 0.42, "learning_rate": 0.0005816080885893115, "loss": 0.0697, "theoretical_loss": 3.566176421007085, "tokens_seen": 1399848960 }, { "epoch": 0.42, "learning_rate": 0.0005815278446477291, "loss": 0.0691, "theoretical_loss": 3.566117239407256, "tokens_seen": 1400111104 }, { "epoch": 0.42, "learning_rate": 0.0005814476007061466, "loss": 0.0718, "theoretical_loss": 3.5660580719889237, "tokens_seen": 1400373248 }, { "epoch": 0.42, "learning_rate": 0.0005813673567645643, "loss": 0.0727, "theoretical_loss": 3.5659989187460353, "tokens_seen": 1400635392 }, { "epoch": 0.42, "learning_rate": 0.0005812871128229819, "loss": 0.0741, "theoretical_loss": 3.5659397796725427, "tokens_seen": 1400897536 }, { "epoch": 0.42, "learning_rate": 0.0005812068688813995, "loss": 0.067, "theoretical_loss": 3.565880654762402, "tokens_seen": 1401159680 }, { "epoch": 0.42, "learning_rate": 0.0005811266249398171, "loss": 0.0684, "theoretical_loss": 3.5658215440095717, "tokens_seen": 1401421824 }, { "epoch": 0.42, "learning_rate": 0.0005810463809982346, "loss": 0.0698, "theoretical_loss": 3.565762447408015, "tokens_seen": 1401683968 }, { "epoch": 0.42, "learning_rate": 0.0005809661370566522, "loss": 0.0722, "theoretical_loss": 3.5657033649516974, "tokens_seen": 1401946112 }, { "epoch": 0.42, "learning_rate": 0.0005808858931150698, "loss": 0.0693, "theoretical_loss": 3.5656442966345905, "tokens_seen": 1402208256 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.00029971322510391474, "objective/train/docs_used": 512300, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5333430767059326, "objective/train/original_loss": 1.5333430767059326, "objective/train/theoretical_loss": 3.565585242450667, "objective/train/tokens_used": 1422930400, "objective/train/value_avg": -0.00736236572265625, "objective/train/value_loss": 0.00021842159912921488, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.68017578125, "objective/train/value_reward_corr": 0.717017729068119, "objective/train/value_std": 0.01406097412109375, "objective/train/weight_avg": 1.0004018545150757, "objective/train/weighted_lm_loss": 1.5342278480529785, "objective/train/weights_max": 1.4047281742095947, "objective/train/weights_min": 0.3692037761211395, "theoretical_loss": 3.565585242450667, "tokens_seen": 1402470400 }, { "epoch": 0.43, "learning_rate": 0.0005808056491734874, "loss": 0.0744, "theoretical_loss": 3.565585242450667, "tokens_seen": 1402470400 }, { "epoch": 0.43, "learning_rate": 0.000580725405231905, "loss": 0.0715, "theoretical_loss": 3.5655262023939054, "tokens_seen": 1402732544 }, { "epoch": 0.43, "learning_rate": 0.0005806451612903226, "loss": 0.0688, "theoretical_loss": 3.5654671764582866, "tokens_seen": 1402994688 }, { "epoch": 0.43, "learning_rate": 0.0005805649173487401, "loss": 0.0703, "theoretical_loss": 3.5654081646377955, "tokens_seen": 1403256832 }, { "epoch": 0.43, "learning_rate": 0.0005804846734071579, "loss": 0.0705, "theoretical_loss": 3.5653491669264215, "tokens_seen": 1403518976 }, { "epoch": 0.43, "learning_rate": 0.0005804044294655754, "loss": 0.0709, "theoretical_loss": 3.565290183318156, "tokens_seen": 1403781120 }, { "epoch": 0.43, "learning_rate": 0.0005803241855239929, "loss": 0.0706, "theoretical_loss": 3.565231213806995, "tokens_seen": 1404043264 }, { "epoch": 0.43, "learning_rate": 0.0005802439415824106, "loss": 0.0692, "theoretical_loss": 3.5651722583869394, "tokens_seen": 1404305408 }, { "epoch": 0.43, "learning_rate": 0.0005801636976408281, "loss": 0.0723, "theoretical_loss": 3.565113317051991, "tokens_seen": 1404567552 }, { "epoch": 0.43, "learning_rate": 0.0005800834536992457, "loss": 0.07, "theoretical_loss": 3.5650543897961584, "tokens_seen": 1404829696 }, { "epoch": 0.43, "learning_rate": 0.0005800032097576633, "loss": 0.0722, "theoretical_loss": 3.5649954766134515, "tokens_seen": 1405091840 }, { "epoch": 0.43, "learning_rate": 0.0005799229658160809, "loss": 0.0725, "theoretical_loss": 3.5649365774978845, "tokens_seen": 1405353984 }, { "epoch": 0.43, "learning_rate": 0.0005798427218744984, "loss": 0.0698, "theoretical_loss": 3.5648776924434755, "tokens_seen": 1405616128 }, { "epoch": 0.43, "learning_rate": 0.0005797624779329161, "loss": 0.0725, "theoretical_loss": 3.5648188214442467, "tokens_seen": 1405878272 }, { "epoch": 0.43, "learning_rate": 0.0005796822339913337, "loss": 0.0734, "theoretical_loss": 3.5647599644942227, "tokens_seen": 1406140416 }, { "epoch": 0.43, "learning_rate": 0.0005796019900497512, "loss": 0.0713, "theoretical_loss": 3.564701121587434, "tokens_seen": 1406402560 }, { "epoch": 0.43, "learning_rate": 0.0005795217461081689, "loss": 0.067, "theoretical_loss": 3.5646422927179113, "tokens_seen": 1406664704 }, { "epoch": 0.43, "learning_rate": 0.0005794415021665864, "loss": 0.0693, "theoretical_loss": 3.564583477879692, "tokens_seen": 1406926848 }, { "epoch": 0.43, "learning_rate": 0.0005793612582250041, "loss": 0.0707, "theoretical_loss": 3.5645246770668164, "tokens_seen": 1407188992 }, { "epoch": 0.43, "learning_rate": 0.0005792810142834216, "loss": 0.0717, "theoretical_loss": 3.5644658902733273, "tokens_seen": 1407451136 }, { "epoch": 0.43, "learning_rate": 0.0005792007703418391, "loss": 0.0714, "theoretical_loss": 3.564407117493272, "tokens_seen": 1407713280 }, { "epoch": 0.43, "learning_rate": 0.0005791205264002569, "loss": 0.0696, "theoretical_loss": 3.564348358720702, "tokens_seen": 1407975424 }, { "epoch": 0.43, "learning_rate": 0.0005790402824586744, "loss": 0.0734, "theoretical_loss": 3.564289613949671, "tokens_seen": 1408237568 }, { "epoch": 0.43, "learning_rate": 0.000578960038517092, "loss": 0.0715, "theoretical_loss": 3.5642308831742384, "tokens_seen": 1408499712 }, { "epoch": 0.43, "learning_rate": 0.0005788797945755096, "loss": 0.0707, "theoretical_loss": 3.564172166388465, "tokens_seen": 1408761856 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0017313800053671002, "objective/train/docs_used": 514827, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5287766456604004, "objective/train/original_loss": 1.5287766456604004, "objective/train/theoretical_loss": 3.5641134635864153, "objective/train/tokens_used": 1429484000, "objective/train/value_avg": -0.0082550048828125, "objective/train/value_loss": 0.00014103070134297013, "objective/train/value_max": -6.157159805297852e-05, "objective/train/value_min": -0.66455078125, "objective/train/value_reward_corr": 0.7031854825152273, "objective/train/value_std": 0.01306915283203125, "objective/train/weight_avg": 1.0018001794815063, "objective/train/weighted_lm_loss": 1.531276822090149, "objective/train/weights_max": 1.2216413021087646, "objective/train/weights_min": 0.5578534603118896, "theoretical_loss": 3.5641134635864153, "tokens_seen": 1409024000 }, { "epoch": 0.43, "learning_rate": 0.0005787995506339272, "loss": 0.0709, "theoretical_loss": 3.5641134635864153, "tokens_seen": 1409024000 }, { "epoch": 0.43, "learning_rate": 0.0005787193066923447, "loss": 0.0692, "theoretical_loss": 3.56405477476216, "tokens_seen": 1409286144 }, { "epoch": 0.43, "learning_rate": 0.0005786390627507623, "loss": 0.0711, "theoretical_loss": 3.563996099909771, "tokens_seen": 1409548288 }, { "epoch": 0.43, "learning_rate": 0.0005785588188091799, "loss": 0.0728, "theoretical_loss": 3.5639374390233245, "tokens_seen": 1409810432 }, { "epoch": 0.43, "learning_rate": 0.0005784785748675974, "loss": 0.0705, "theoretical_loss": 3.563878792096901, "tokens_seen": 1410072576 }, { "epoch": 0.43, "learning_rate": 0.0005783983309260151, "loss": 0.0707, "theoretical_loss": 3.5638201591245826, "tokens_seen": 1410334720 }, { "epoch": 0.43, "learning_rate": 0.0005783180869844327, "loss": 0.0696, "theoretical_loss": 3.563761540100457, "tokens_seen": 1410596864 }, { "epoch": 0.43, "learning_rate": 0.0005782378430428504, "loss": 0.0723, "theoretical_loss": 3.5637029350186156, "tokens_seen": 1410859008 }, { "epoch": 0.43, "learning_rate": 0.0005781575991012679, "loss": 0.0715, "theoretical_loss": 3.563644343873152, "tokens_seen": 1411121152 }, { "epoch": 0.43, "learning_rate": 0.0005780773551596854, "loss": 0.0741, "theoretical_loss": 3.5635857666581643, "tokens_seen": 1411383296 }, { "epoch": 0.43, "learning_rate": 0.0005779971112181031, "loss": 0.0722, "theoretical_loss": 3.5635272033677534, "tokens_seen": 1411645440 }, { "epoch": 0.43, "learning_rate": 0.0005779168672765206, "loss": 0.0734, "theoretical_loss": 3.5634686539960247, "tokens_seen": 1411907584 }, { "epoch": 0.43, "learning_rate": 0.0005778366233349382, "loss": 0.0723, "theoretical_loss": 3.5634101185370874, "tokens_seen": 1412169728 }, { "epoch": 0.43, "learning_rate": 0.0005777563793933558, "loss": 0.0709, "theoretical_loss": 3.5633515969850533, "tokens_seen": 1412431872 }, { "epoch": 0.43, "learning_rate": 0.0005776761354517734, "loss": 0.0711, "theoretical_loss": 3.5632930893340378, "tokens_seen": 1412694016 }, { "epoch": 0.43, "learning_rate": 0.000577595891510191, "loss": 0.0722, "theoretical_loss": 3.5632345955781606, "tokens_seen": 1412956160 }, { "epoch": 0.43, "learning_rate": 0.0005775156475686087, "loss": 0.0697, "theoretical_loss": 3.5631761157115456, "tokens_seen": 1413218304 }, { "epoch": 0.43, "learning_rate": 0.0005774354036270262, "loss": 0.0694, "theoretical_loss": 3.5631176497283175, "tokens_seen": 1413480448 }, { "epoch": 0.43, "learning_rate": 0.0005773551596854437, "loss": 0.0722, "theoretical_loss": 3.563059197622608, "tokens_seen": 1413742592 }, { "epoch": 0.43, "learning_rate": 0.0005772749157438614, "loss": 0.0722, "theoretical_loss": 3.56300075938855, "tokens_seen": 1414004736 }, { "epoch": 0.43, "learning_rate": 0.0005771946718022789, "loss": 0.072, "theoretical_loss": 3.5629423350202813, "tokens_seen": 1414266880 }, { "epoch": 0.43, "learning_rate": 0.0005771144278606965, "loss": 0.0708, "theoretical_loss": 3.5628839245119424, "tokens_seen": 1414529024 }, { "epoch": 0.43, "learning_rate": 0.0005770341839191141, "loss": 0.0734, "theoretical_loss": 3.5628255278576777, "tokens_seen": 1414791168 }, { "epoch": 0.43, "learning_rate": 0.0005769539399775317, "loss": 0.0721, "theoretical_loss": 3.5627671450516347, "tokens_seen": 1415053312 }, { "epoch": 0.43, "learning_rate": 0.0005768736960359494, "loss": 0.0721, "theoretical_loss": 3.5627087760879657, "tokens_seen": 1415315456 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.000985055579803884, "objective/train/docs_used": 517252, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4529213905334473, "objective/train/original_loss": 1.4529212713241577, "objective/train/theoretical_loss": 3.5626504209608254, "objective/train/tokens_used": 1436037600, "objective/train/value_avg": -0.006443023681640625, "objective/train/value_loss": 0.00012476177653297782, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.61279296875, "objective/train/value_reward_corr": 0.6847537745656294, "objective/train/value_std": 0.01068115234375, "objective/train/weight_avg": 1.0010443925857544, "objective/train/weighted_lm_loss": 1.4547419548034668, "objective/train/weights_max": 1.409537672996521, "objective/train/weights_min": 0.3798121213912964, "theoretical_loss": 3.5626504209608254, "tokens_seen": 1415577600 }, { "epoch": 0.43, "learning_rate": 0.0005767934520943669, "loss": 0.0717, "theoretical_loss": 3.5626504209608254, "tokens_seen": 1415577600 }, { "epoch": 0.43, "learning_rate": 0.0005767132081527845, "loss": 0.0676, "theoretical_loss": 3.5625920796643724, "tokens_seen": 1415839744 }, { "epoch": 0.43, "learning_rate": 0.0005766329642112021, "loss": 0.074, "theoretical_loss": 3.5625337521927687, "tokens_seen": 1416101888 }, { "epoch": 0.43, "learning_rate": 0.0005765527202696197, "loss": 0.0724, "theoretical_loss": 3.5624754385401802, "tokens_seen": 1416364032 }, { "epoch": 0.43, "learning_rate": 0.0005764724763280372, "loss": 0.0704, "theoretical_loss": 3.562417138700776, "tokens_seen": 1416626176 }, { "epoch": 0.43, "learning_rate": 0.0005763922323864549, "loss": 0.0727, "theoretical_loss": 3.5623588526687295, "tokens_seen": 1416888320 }, { "epoch": 0.43, "learning_rate": 0.0005763119884448724, "loss": 0.0682, "theoretical_loss": 3.562300580438216, "tokens_seen": 1417150464 }, { "epoch": 0.43, "learning_rate": 0.0005762317445032899, "loss": 0.073, "theoretical_loss": 3.562242322003416, "tokens_seen": 1417412608 }, { "epoch": 0.43, "learning_rate": 0.0005761515005617077, "loss": 0.0713, "theoretical_loss": 3.5621840773585127, "tokens_seen": 1417674752 }, { "epoch": 0.43, "learning_rate": 0.0005760712566201252, "loss": 0.0694, "theoretical_loss": 3.5621258464976924, "tokens_seen": 1417936896 }, { "epoch": 0.43, "learning_rate": 0.0005759910126785428, "loss": 0.0689, "theoretical_loss": 3.5620676294151465, "tokens_seen": 1418199040 }, { "epoch": 0.43, "learning_rate": 0.0005759107687369604, "loss": 0.0696, "theoretical_loss": 3.562009426105069, "tokens_seen": 1418461184 }, { "epoch": 0.43, "learning_rate": 0.000575830524795378, "loss": 0.0714, "theoretical_loss": 3.561951236561656, "tokens_seen": 1418723328 }, { "epoch": 0.43, "learning_rate": 0.0005757502808537955, "loss": 0.0727, "theoretical_loss": 3.56189306077911, "tokens_seen": 1418985472 }, { "epoch": 0.43, "learning_rate": 0.0005756700369122131, "loss": 0.072, "theoretical_loss": 3.561834898751635, "tokens_seen": 1419247616 }, { "epoch": 0.43, "learning_rate": 0.0005755897929706307, "loss": 0.0709, "theoretical_loss": 3.561776750473439, "tokens_seen": 1419509760 }, { "epoch": 0.43, "learning_rate": 0.0005755095490290483, "loss": 0.0729, "theoretical_loss": 3.561718615938733, "tokens_seen": 1419771904 }, { "epoch": 0.43, "learning_rate": 0.000575429305087466, "loss": 0.0705, "theoretical_loss": 3.5616604951417328, "tokens_seen": 1420034048 }, { "epoch": 0.43, "learning_rate": 0.0005753490611458835, "loss": 0.0707, "theoretical_loss": 3.561602388076656, "tokens_seen": 1420296192 }, { "epoch": 0.43, "learning_rate": 0.0005752688172043012, "loss": 0.0697, "theoretical_loss": 3.5615442947377254, "tokens_seen": 1420558336 }, { "epoch": 0.43, "learning_rate": 0.0005751885732627187, "loss": 0.0722, "theoretical_loss": 3.5614862151191664, "tokens_seen": 1420820480 }, { "epoch": 0.43, "learning_rate": 0.0005751083293211362, "loss": 0.0719, "theoretical_loss": 3.561428149215208, "tokens_seen": 1421082624 }, { "epoch": 0.43, "learning_rate": 0.0005750280853795539, "loss": 0.0695, "theoretical_loss": 3.561370097020083, "tokens_seen": 1421344768 }, { "epoch": 0.43, "learning_rate": 0.0005749478414379714, "loss": 0.0712, "theoretical_loss": 3.561312058528026, "tokens_seen": 1421606912 }, { "epoch": 0.43, "learning_rate": 0.000574867597496389, "loss": 0.0708, "theoretical_loss": 3.5612540337332783, "tokens_seen": 1421869056 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0007025928935036063, "objective/train/docs_used": 519513, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4979246854782104, "objective/train/original_loss": 1.497924566268921, "objective/train/theoretical_loss": 3.5611960226300816, "objective/train/tokens_used": 1442591200, "objective/train/value_avg": -0.007648468017578125, "objective/train/value_loss": 0.00016468969988636672, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.35791015625, "objective/train/value_reward_corr": 0.7218915626636218, "objective/train/value_std": 0.01317596435546875, "objective/train/weight_avg": 1.0007803440093994, "objective/train/weighted_lm_loss": 1.4997540712356567, "objective/train/weights_max": 1.2718398571014404, "objective/train/weights_min": 0.3820907473564148, "theoretical_loss": 3.5611960226300816, "tokens_seen": 1422131200 }, { "epoch": 0.43, "learning_rate": 0.0005747873535548066, "loss": 0.0722, "theoretical_loss": 3.5611960226300816, "tokens_seen": 1422131200 }, { "epoch": 0.43, "learning_rate": 0.0005747071096132242, "loss": 0.0712, "theoretical_loss": 3.561138025212683, "tokens_seen": 1422393344 }, { "epoch": 0.43, "learning_rate": 0.0005746268656716417, "loss": 0.07, "theoretical_loss": 3.561080041475332, "tokens_seen": 1422655488 }, { "epoch": 0.43, "learning_rate": 0.0005745466217300594, "loss": 0.0693, "theoretical_loss": 3.5610220714122827, "tokens_seen": 1422917632 }, { "epoch": 0.43, "learning_rate": 0.000574466377788477, "loss": 0.0698, "theoretical_loss": 3.560964115017791, "tokens_seen": 1423179776 }, { "epoch": 0.43, "learning_rate": 0.0005743861338468946, "loss": 0.07, "theoretical_loss": 3.560906172286118, "tokens_seen": 1423441920 }, { "epoch": 0.43, "learning_rate": 0.0005743058899053122, "loss": 0.0703, "theoretical_loss": 3.5608482432115265, "tokens_seen": 1423704064 }, { "epoch": 0.43, "learning_rate": 0.0005742256459637297, "loss": 0.0699, "theoretical_loss": 3.5607903277882853, "tokens_seen": 1423966208 }, { "epoch": 0.43, "learning_rate": 0.0005741454020221474, "loss": 0.0712, "theoretical_loss": 3.560732426010664, "tokens_seen": 1424228352 }, { "epoch": 0.43, "learning_rate": 0.0005740651580805649, "loss": 0.0726, "theoretical_loss": 3.560674537872937, "tokens_seen": 1424490496 }, { "epoch": 0.43, "learning_rate": 0.0005739849141389825, "loss": 0.0693, "theoretical_loss": 3.560616663369382, "tokens_seen": 1424752640 }, { "epoch": 0.43, "learning_rate": 0.0005739046701974002, "loss": 0.0704, "theoretical_loss": 3.5605588024942803, "tokens_seen": 1425014784 }, { "epoch": 0.43, "learning_rate": 0.0005738244262558177, "loss": 0.0706, "theoretical_loss": 3.560500955241916, "tokens_seen": 1425276928 }, { "epoch": 0.43, "learning_rate": 0.0005737441823142353, "loss": 0.0714, "theoretical_loss": 3.5604431216065775, "tokens_seen": 1425539072 }, { "epoch": 0.43, "learning_rate": 0.0005736639383726529, "loss": 0.0704, "theoretical_loss": 3.5603853015825564, "tokens_seen": 1425801216 }, { "epoch": 0.43, "learning_rate": 0.0005735836944310705, "loss": 0.0693, "theoretical_loss": 3.560327495164147, "tokens_seen": 1426063360 }, { "epoch": 0.43, "learning_rate": 0.000573503450489488, "loss": 0.0681, "theoretical_loss": 3.5602697023456473, "tokens_seen": 1426325504 }, { "epoch": 0.43, "learning_rate": 0.0005734232065479057, "loss": 0.0706, "theoretical_loss": 3.5602119231213605, "tokens_seen": 1426587648 }, { "epoch": 0.43, "learning_rate": 0.0005733429626063232, "loss": 0.0703, "theoretical_loss": 3.5601541574855906, "tokens_seen": 1426849792 }, { "epoch": 0.43, "learning_rate": 0.0005732627186647407, "loss": 0.071, "theoretical_loss": 3.5600964054326463, "tokens_seen": 1427111936 }, { "epoch": 0.43, "learning_rate": 0.0005731824747231584, "loss": 0.0721, "theoretical_loss": 3.5600386669568405, "tokens_seen": 1427374080 }, { "epoch": 0.43, "learning_rate": 0.000573102230781576, "loss": 0.0753, "theoretical_loss": 3.559980942052488, "tokens_seen": 1427636224 }, { "epoch": 0.43, "learning_rate": 0.0005730219868399937, "loss": 0.0698, "theoretical_loss": 3.559923230713907, "tokens_seen": 1427898368 }, { "epoch": 0.43, "learning_rate": 0.0005729417428984112, "loss": 0.0702, "theoretical_loss": 3.5598655329354214, "tokens_seen": 1428160512 }, { "epoch": 0.43, "learning_rate": 0.0005728614989568288, "loss": 0.0721, "theoretical_loss": 3.5598078487113556, "tokens_seen": 1428422656 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0015544474590569735, "objective/train/docs_used": 521864, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4286561012268066, "objective/train/original_loss": 1.4286558628082275, "objective/train/theoretical_loss": 3.55975017803604, "objective/train/tokens_used": 1449144800, "objective/train/value_avg": -0.00952911376953125, "objective/train/value_loss": 0.0003103648195974529, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.6337890625, "objective/train/value_reward_corr": 0.6740575950147936, "objective/train/value_std": 0.0167236328125, "objective/train/weight_avg": 1.0016885995864868, "objective/train/weighted_lm_loss": 1.430799126625061, "objective/train/weights_max": 1.1823221445083618, "objective/train/weights_min": 0.3692566156387329, "theoretical_loss": 3.55975017803604, "tokens_seen": 1428684800 }, { "epoch": 0.43, "learning_rate": 0.0005727812550152464, "loss": 0.0732, "theoretical_loss": 3.55975017803604, "tokens_seen": 1428684800 }, { "epoch": 0.43, "learning_rate": 0.0005727010110736639, "loss": 0.0752, "theoretical_loss": 3.5596925209038055, "tokens_seen": 1428946944 }, { "epoch": 0.43, "learning_rate": 0.0005726207671320815, "loss": 0.072, "theoretical_loss": 3.55963487730899, "tokens_seen": 1429209088 }, { "epoch": 0.43, "learning_rate": 0.0005725405231904991, "loss": 0.0698, "theoretical_loss": 3.5595772472459313, "tokens_seen": 1429471232 }, { "epoch": 0.43, "learning_rate": 0.0005724602792489167, "loss": 0.0707, "theoretical_loss": 3.559519630708973, "tokens_seen": 1429733376 }, { "epoch": 0.43, "learning_rate": 0.0005723800353073343, "loss": 0.0711, "theoretical_loss": 3.5594620276924607, "tokens_seen": 1429995520 }, { "epoch": 0.43, "learning_rate": 0.000572299791365752, "loss": 0.0703, "theoretical_loss": 3.559404438190745, "tokens_seen": 1430257664 }, { "epoch": 0.43, "learning_rate": 0.0005722195474241695, "loss": 0.0706, "theoretical_loss": 3.559346862198178, "tokens_seen": 1430519808 }, { "epoch": 0.43, "learning_rate": 0.000572139303482587, "loss": 0.0717, "theoretical_loss": 3.559289299709116, "tokens_seen": 1430781952 }, { "epoch": 0.43, "learning_rate": 0.0005720590595410047, "loss": 0.0709, "theoretical_loss": 3.5592317507179194, "tokens_seen": 1431044096 }, { "epoch": 0.43, "learning_rate": 0.0005719788155994222, "loss": 0.072, "theoretical_loss": 3.5591742152189507, "tokens_seen": 1431306240 }, { "epoch": 0.43, "learning_rate": 0.0005718985716578399, "loss": 0.0694, "theoretical_loss": 3.559116693206577, "tokens_seen": 1431568384 }, { "epoch": 0.43, "learning_rate": 0.0005718183277162574, "loss": 0.0735, "theoretical_loss": 3.5590591846751685, "tokens_seen": 1431830528 }, { "epoch": 0.43, "learning_rate": 0.000571738083774675, "loss": 0.07, "theoretical_loss": 3.5590016896190977, "tokens_seen": 1432092672 }, { "epoch": 0.43, "learning_rate": 0.0005716578398330927, "loss": 0.07, "theoretical_loss": 3.5589442080327416, "tokens_seen": 1432354816 }, { "epoch": 0.43, "learning_rate": 0.0005715775958915102, "loss": 0.071, "theoretical_loss": 3.5588867399104798, "tokens_seen": 1432616960 }, { "epoch": 0.43, "learning_rate": 0.0005714973519499278, "loss": 0.0718, "theoretical_loss": 3.5588292852466967, "tokens_seen": 1432879104 }, { "epoch": 0.43, "learning_rate": 0.0005714171080083454, "loss": 0.0671, "theoretical_loss": 3.558771844035779, "tokens_seen": 1433141248 }, { "epoch": 0.43, "learning_rate": 0.000571336864066763, "loss": 0.0681, "theoretical_loss": 3.5587144162721156, "tokens_seen": 1433403392 }, { "epoch": 0.43, "learning_rate": 0.0005712566201251805, "loss": 0.0687, "theoretical_loss": 3.5586570019501016, "tokens_seen": 1433665536 }, { "epoch": 0.43, "learning_rate": 0.0005711763761835982, "loss": 0.0737, "theoretical_loss": 3.558599601064133, "tokens_seen": 1433927680 }, { "epoch": 0.43, "learning_rate": 0.0005710961322420157, "loss": 0.072, "theoretical_loss": 3.5585422136086104, "tokens_seen": 1434189824 }, { "epoch": 0.43, "learning_rate": 0.0005710158883004333, "loss": 0.0719, "theoretical_loss": 3.558484839577937, "tokens_seen": 1434451968 }, { "epoch": 0.43, "learning_rate": 0.000570935644358851, "loss": 0.0711, "theoretical_loss": 3.55842747896652, "tokens_seen": 1434714112 }, { "epoch": 0.43, "learning_rate": 0.0005708554004172685, "loss": 0.0678, "theoretical_loss": 3.55837013176877, "tokens_seen": 1434976256 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.001079781330190599, "objective/train/docs_used": 524207, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4617466926574707, "objective/train/original_loss": 1.4617464542388916, "objective/train/theoretical_loss": 3.5583127979791005, "objective/train/tokens_used": 1455698400, "objective/train/value_avg": -0.0064697265625, "objective/train/value_loss": 0.00015081166930031031, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.29443359375, "objective/train/value_reward_corr": 0.6869328360522499, "objective/train/value_std": 0.012359619140625, "objective/train/weight_avg": 1.0011464357376099, "objective/train/weighted_lm_loss": 1.463802695274353, "objective/train/weights_max": 1.2564263343811035, "objective/train/weights_min": 0.3687688410282135, "theoretical_loss": 3.5583127979791005, "tokens_seen": 1435238400 }, { "epoch": 0.43, "learning_rate": 0.0005707751564756861, "loss": 0.0698, "theoretical_loss": 3.5583127979791005, "tokens_seen": 1435238400 }, { "epoch": 0.44, "learning_rate": 0.0005706949125341037, "loss": 0.0712, "theoretical_loss": 3.558255477591928, "tokens_seen": 1435500544 }, { "epoch": 0.44, "learning_rate": 0.0005706146685925213, "loss": 0.0694, "theoretical_loss": 3.558198170601674, "tokens_seen": 1435762688 }, { "epoch": 0.44, "learning_rate": 0.0005705344246509389, "loss": 0.0695, "theoretical_loss": 3.558140877002761, "tokens_seen": 1436024832 }, { "epoch": 0.44, "learning_rate": 0.0005704541807093565, "loss": 0.0726, "theoretical_loss": 3.5580835967896167, "tokens_seen": 1436286976 }, { "epoch": 0.44, "learning_rate": 0.000570373936767774, "loss": 0.0696, "theoretical_loss": 3.5580263299566712, "tokens_seen": 1436549120 }, { "epoch": 0.44, "learning_rate": 0.0005702936928261916, "loss": 0.0726, "theoretical_loss": 3.5579690764983587, "tokens_seen": 1436811264 }, { "epoch": 0.44, "learning_rate": 0.0005702134488846092, "loss": 0.0669, "theoretical_loss": 3.557911836409115, "tokens_seen": 1437073408 }, { "epoch": 0.44, "learning_rate": 0.0005701332049430268, "loss": 0.0695, "theoretical_loss": 3.557854609683382, "tokens_seen": 1437335552 }, { "epoch": 0.44, "learning_rate": 0.0005700529610014445, "loss": 0.0694, "theoretical_loss": 3.5577973963156024, "tokens_seen": 1437597696 }, { "epoch": 0.44, "learning_rate": 0.000569972717059862, "loss": 0.0692, "theoretical_loss": 3.557740196300224, "tokens_seen": 1437859840 }, { "epoch": 0.44, "learning_rate": 0.0005698924731182796, "loss": 0.0726, "theoretical_loss": 3.5576830096316963, "tokens_seen": 1438121984 }, { "epoch": 0.44, "learning_rate": 0.0005698122291766972, "loss": 0.0714, "theoretical_loss": 3.5576258363044735, "tokens_seen": 1438384128 }, { "epoch": 0.44, "learning_rate": 0.0005697319852351147, "loss": 0.073, "theoretical_loss": 3.5575686763130117, "tokens_seen": 1438646272 }, { "epoch": 0.44, "learning_rate": 0.0005696517412935323, "loss": 0.0717, "theoretical_loss": 3.5575115296517725, "tokens_seen": 1438908416 }, { "epoch": 0.44, "learning_rate": 0.0005695714973519499, "loss": 0.0707, "theoretical_loss": 3.5574543963152188, "tokens_seen": 1439170560 }, { "epoch": 0.44, "learning_rate": 0.0005694912534103675, "loss": 0.0688, "theoretical_loss": 3.5573972762978174, "tokens_seen": 1439432704 }, { "epoch": 0.44, "learning_rate": 0.000569411009468785, "loss": 0.0742, "theoretical_loss": 3.5573401695940383, "tokens_seen": 1439694848 }, { "epoch": 0.44, "learning_rate": 0.0005693307655272028, "loss": 0.0704, "theoretical_loss": 3.557283076198356, "tokens_seen": 1439956992 }, { "epoch": 0.44, "learning_rate": 0.0005692505215856203, "loss": 0.0725, "theoretical_loss": 3.557225996105246, "tokens_seen": 1440219136 }, { "epoch": 0.44, "learning_rate": 0.0005691702776440379, "loss": 0.0735, "theoretical_loss": 3.5571689293091895, "tokens_seen": 1440481280 }, { "epoch": 0.44, "learning_rate": 0.0005690900337024555, "loss": 0.0693, "theoretical_loss": 3.5571118758046696, "tokens_seen": 1440743424 }, { "epoch": 0.44, "learning_rate": 0.000569009789760873, "loss": 0.0682, "theoretical_loss": 3.5570548355861726, "tokens_seen": 1441005568 }, { "epoch": 0.44, "learning_rate": 0.0005689295458192907, "loss": 0.0733, "theoretical_loss": 3.5569978086481884, "tokens_seen": 1441267712 }, { "epoch": 0.44, "learning_rate": 0.0005688493018777082, "loss": 0.0682, "theoretical_loss": 3.556940794985211, "tokens_seen": 1441529856 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.001043870928697288, "objective/train/docs_used": 526671, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3674421310424805, "objective/train/original_loss": 1.3674421310424805, "objective/train/theoretical_loss": 3.5568837945917364, "objective/train/tokens_used": 1462252000, "objective/train/value_avg": -0.00707244873046875, "objective/train/value_loss": 0.00022587105922866613, "objective/train/value_max": -6.556510925292969e-05, "objective/train/value_min": -0.299560546875, "objective/train/value_reward_corr": 0.6144987543879608, "objective/train/value_std": 0.0108795166015625, "objective/train/weight_avg": 1.001141905784607, "objective/train/weighted_lm_loss": 1.3684946298599243, "objective/train/weights_max": 1.2918591499328613, "objective/train/weights_min": 0.37235790491104126, "theoretical_loss": 3.5568837945917364, "tokens_seen": 1441792000 }, { "epoch": 0.44, "learning_rate": 0.0005687690579361258, "loss": 0.0703, "theoretical_loss": 3.5568837945917364, "tokens_seen": 1441792000 }, { "epoch": 0.44, "learning_rate": 0.0005686888139945435, "loss": 0.0734, "theoretical_loss": 3.556826807462264, "tokens_seen": 1442054144 }, { "epoch": 0.44, "learning_rate": 0.000568608570052961, "loss": 0.0711, "theoretical_loss": 3.5567698335912983, "tokens_seen": 1442316288 }, { "epoch": 0.44, "learning_rate": 0.0005685283261113786, "loss": 0.0725, "theoretical_loss": 3.5567128729733444, "tokens_seen": 1442578432 }, { "epoch": 0.44, "learning_rate": 0.0005684480821697962, "loss": 0.0701, "theoretical_loss": 3.5566559256029118, "tokens_seen": 1442840576 }, { "epoch": 0.44, "learning_rate": 0.0005683678382282138, "loss": 0.0688, "theoretical_loss": 3.556598991474515, "tokens_seen": 1443102720 }, { "epoch": 0.44, "learning_rate": 0.0005682875942866313, "loss": 0.071, "theoretical_loss": 3.556542070582669, "tokens_seen": 1443364864 }, { "epoch": 0.44, "learning_rate": 0.000568207350345049, "loss": 0.0706, "theoretical_loss": 3.5564851629218928, "tokens_seen": 1443627008 }, { "epoch": 0.44, "learning_rate": 0.0005681271064034665, "loss": 0.0688, "theoretical_loss": 3.55642826848671, "tokens_seen": 1443889152 }, { "epoch": 0.44, "learning_rate": 0.000568046862461884, "loss": 0.0681, "theoretical_loss": 3.5563713872716467, "tokens_seen": 1444151296 }, { "epoch": 0.44, "learning_rate": 0.0005679666185203018, "loss": 0.0719, "theoretical_loss": 3.5563145192712318, "tokens_seen": 1444413440 }, { "epoch": 0.44, "learning_rate": 0.0005678863745787193, "loss": 0.0722, "theoretical_loss": 3.5562576644799977, "tokens_seen": 1444675584 }, { "epoch": 0.44, "learning_rate": 0.000567806130637137, "loss": 0.0719, "theoretical_loss": 3.5562008228924804, "tokens_seen": 1444937728 }, { "epoch": 0.44, "learning_rate": 0.0005677258866955545, "loss": 0.07, "theoretical_loss": 3.556143994503219, "tokens_seen": 1445199872 }, { "epoch": 0.44, "learning_rate": 0.0005676456427539721, "loss": 0.0672, "theoretical_loss": 3.5560871793067554, "tokens_seen": 1445462016 }, { "epoch": 0.44, "learning_rate": 0.0005675653988123897, "loss": 0.0695, "theoretical_loss": 3.556030377297635, "tokens_seen": 1445724160 }, { "epoch": 0.44, "learning_rate": 0.0005674851548708073, "loss": 0.0715, "theoretical_loss": 3.5559735884704073, "tokens_seen": 1445986304 }, { "epoch": 0.44, "learning_rate": 0.0005674049109292248, "loss": 0.0703, "theoretical_loss": 3.5559168128196235, "tokens_seen": 1446248448 }, { "epoch": 0.44, "learning_rate": 0.0005673246669876424, "loss": 0.0698, "theoretical_loss": 3.555860050339839, "tokens_seen": 1446510592 }, { "epoch": 0.44, "learning_rate": 0.00056724442304606, "loss": 0.0705, "theoretical_loss": 3.555803301025613, "tokens_seen": 1446772736 }, { "epoch": 0.44, "learning_rate": 0.0005671641791044776, "loss": 0.0694, "theoretical_loss": 3.555746564871506, "tokens_seen": 1447034880 }, { "epoch": 0.44, "learning_rate": 0.0005670839351628953, "loss": 0.068, "theoretical_loss": 3.5556898418720837, "tokens_seen": 1447297024 }, { "epoch": 0.44, "learning_rate": 0.0005670036912213128, "loss": 0.0674, "theoretical_loss": 3.555633132021914, "tokens_seen": 1447559168 }, { "epoch": 0.44, "learning_rate": 0.0005669234472797304, "loss": 0.07, "theoretical_loss": 3.5555764353155688, "tokens_seen": 1447821312 }, { "epoch": 0.44, "learning_rate": 0.000566843203338148, "loss": 0.07, "theoretical_loss": 3.555519751747622, "tokens_seen": 1448083456 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0011815315810963511, "objective/train/docs_used": 529166, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4328693151474, "objective/train/original_loss": 1.4328691959381104, "objective/train/theoretical_loss": 3.5554630813126513, "objective/train/tokens_used": 1468805600, "objective/train/value_avg": -0.00795745849609375, "objective/train/value_loss": 0.0003213270101696253, "objective/train/value_max": -9.685754776000977e-05, "objective/train/value_min": -0.9267578125, "objective/train/value_reward_corr": 0.7253755597865478, "objective/train/value_std": 0.0184173583984375, "objective/train/weight_avg": 1.0013219118118286, "objective/train/weighted_lm_loss": 1.4348268508911133, "objective/train/weights_max": 1.3883624076843262, "objective/train/weights_min": 0.3690347969532013, "theoretical_loss": 3.5554630813126513, "tokens_seen": 1448345600 }, { "epoch": 0.44, "learning_rate": 0.0005667629593965655, "loss": 0.0709, "theoretical_loss": 3.5554630813126513, "tokens_seen": 1448345600 }, { "epoch": 0.44, "learning_rate": 0.0005666827154549832, "loss": 0.0718, "theoretical_loss": 3.5554064240052385, "tokens_seen": 1448607744 }, { "epoch": 0.44, "learning_rate": 0.0005666024715134007, "loss": 0.0695, "theoretical_loss": 3.5553497798199674, "tokens_seen": 1448869888 }, { "epoch": 0.44, "learning_rate": 0.0005665222275718183, "loss": 0.0723, "theoretical_loss": 3.555293148751426, "tokens_seen": 1449132032 }, { "epoch": 0.44, "learning_rate": 0.000566441983630236, "loss": 0.0721, "theoretical_loss": 3.555236530794204, "tokens_seen": 1449394176 }, { "epoch": 0.44, "learning_rate": 0.0005663617396886536, "loss": 0.07, "theoretical_loss": 3.5551799259428964, "tokens_seen": 1449656320 }, { "epoch": 0.44, "learning_rate": 0.0005662814957470711, "loss": 0.0696, "theoretical_loss": 3.5551233341920994, "tokens_seen": 1449918464 }, { "epoch": 0.44, "learning_rate": 0.0005662012518054887, "loss": 0.0717, "theoretical_loss": 3.555066755536414, "tokens_seen": 1450180608 }, { "epoch": 0.44, "learning_rate": 0.0005661210078639063, "loss": 0.0736, "theoretical_loss": 3.555010189970443, "tokens_seen": 1450442752 }, { "epoch": 0.44, "learning_rate": 0.0005660407639223238, "loss": 0.0681, "theoretical_loss": 3.5549536374887936, "tokens_seen": 1450704896 }, { "epoch": 0.44, "learning_rate": 0.0005659605199807415, "loss": 0.0708, "theoretical_loss": 3.5548970980860757, "tokens_seen": 1450967040 }, { "epoch": 0.44, "learning_rate": 0.000565880276039159, "loss": 0.0697, "theoretical_loss": 3.5548405717569023, "tokens_seen": 1451229184 }, { "epoch": 0.44, "learning_rate": 0.0005658000320975766, "loss": 0.0719, "theoretical_loss": 3.5547840584958896, "tokens_seen": 1451491328 }, { "epoch": 0.44, "learning_rate": 0.0005657197881559943, "loss": 0.0725, "theoretical_loss": 3.554727558297657, "tokens_seen": 1451753472 }, { "epoch": 0.44, "learning_rate": 0.0005656395442144118, "loss": 0.0701, "theoretical_loss": 3.554671071156828, "tokens_seen": 1452015616 }, { "epoch": 0.44, "learning_rate": 0.0005655593002728294, "loss": 0.0707, "theoretical_loss": 3.554614597068027, "tokens_seen": 1452277760 }, { "epoch": 0.44, "learning_rate": 0.000565479056331247, "loss": 0.0716, "theoretical_loss": 3.554558136025884, "tokens_seen": 1452539904 }, { "epoch": 0.44, "learning_rate": 0.0005653988123896646, "loss": 0.072, "theoretical_loss": 3.554501688025031, "tokens_seen": 1452802048 }, { "epoch": 0.44, "learning_rate": 0.0005653185684480822, "loss": 0.0703, "theoretical_loss": 3.554445253060103, "tokens_seen": 1453064192 }, { "epoch": 0.44, "learning_rate": 0.0005652383245064998, "loss": 0.07, "theoretical_loss": 3.5543888311257397, "tokens_seen": 1453326336 }, { "epoch": 0.44, "learning_rate": 0.0005651580805649173, "loss": 0.0716, "theoretical_loss": 3.5543324222165813, "tokens_seen": 1453588480 }, { "epoch": 0.44, "learning_rate": 0.0005650778366233349, "loss": 0.0693, "theoretical_loss": 3.554276026327274, "tokens_seen": 1453850624 }, { "epoch": 0.44, "learning_rate": 0.0005649975926817526, "loss": 0.0715, "theoretical_loss": 3.5542196434524653, "tokens_seen": 1454112768 }, { "epoch": 0.44, "learning_rate": 0.0005649173487401701, "loss": 0.0721, "theoretical_loss": 3.554163273586806, "tokens_seen": 1454374912 }, { "epoch": 0.44, "learning_rate": 0.0005648371047985878, "loss": 0.0696, "theoretical_loss": 3.554106916724951, "tokens_seen": 1454637056 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0007077401387505233, "objective/train/docs_used": 531642, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3581182956695557, "objective/train/original_loss": 1.3581182956695557, "objective/train/theoretical_loss": 3.5540505728615583, "objective/train/tokens_used": 1475359200, "objective/train/value_avg": -0.006244659423828125, "objective/train/value_loss": 0.0002848842123057693, "objective/train/value_max": -7.599592208862305e-05, "objective/train/value_min": -0.51806640625, "objective/train/value_reward_corr": 0.5166717049484271, "objective/train/value_std": 0.01073455810546875, "objective/train/weight_avg": 1.0008255243301392, "objective/train/weighted_lm_loss": 1.3589487075805664, "objective/train/weights_max": 1.625488519668579, "objective/train/weights_min": 0.22773942351341248, "theoretical_loss": 3.5540505728615583, "tokens_seen": 1454899200 }, { "epoch": 0.44, "learning_rate": 0.0005647568608570053, "loss": 0.0699, "theoretical_loss": 3.5540505728615583, "tokens_seen": 1454899200 }, { "epoch": 0.44, "learning_rate": 0.0005646766169154229, "loss": 0.0704, "theoretical_loss": 3.5539942419912878, "tokens_seen": 1455161344 }, { "epoch": 0.44, "learning_rate": 0.0005645963729738405, "loss": 0.0703, "theoretical_loss": 3.553937924108804, "tokens_seen": 1455423488 }, { "epoch": 0.44, "learning_rate": 0.0005645161290322581, "loss": 0.0687, "theoretical_loss": 3.5538816192087728, "tokens_seen": 1455685632 }, { "epoch": 0.44, "learning_rate": 0.0005644358850906756, "loss": 0.0693, "theoretical_loss": 3.5538253272858658, "tokens_seen": 1455947776 }, { "epoch": 0.44, "learning_rate": 0.0005643556411490932, "loss": 0.07, "theoretical_loss": 3.5537690483347557, "tokens_seen": 1456209920 }, { "epoch": 0.44, "learning_rate": 0.0005642753972075108, "loss": 0.0681, "theoretical_loss": 3.5537127823501184, "tokens_seen": 1456472064 }, { "epoch": 0.44, "learning_rate": 0.0005641951532659285, "loss": 0.0694, "theoretical_loss": 3.5536565293266342, "tokens_seen": 1456734208 }, { "epoch": 0.44, "learning_rate": 0.0005641149093243461, "loss": 0.0754, "theoretical_loss": 3.553600289258986, "tokens_seen": 1456996352 }, { "epoch": 0.44, "learning_rate": 0.0005640346653827636, "loss": 0.0712, "theoretical_loss": 3.553544062141859, "tokens_seen": 1457258496 }, { "epoch": 0.44, "learning_rate": 0.0005639544214411813, "loss": 0.0704, "theoretical_loss": 3.5534878479699423, "tokens_seen": 1457520640 }, { "epoch": 0.44, "learning_rate": 0.0005638741774995988, "loss": 0.073, "theoretical_loss": 3.5534316467379288, "tokens_seen": 1457782784 }, { "epoch": 0.44, "learning_rate": 0.0005637939335580163, "loss": 0.0727, "theoretical_loss": 3.5533754584405126, "tokens_seen": 1458044928 }, { "epoch": 0.44, "learning_rate": 0.000563713689616434, "loss": 0.0697, "theoretical_loss": 3.553319283072393, "tokens_seen": 1458307072 }, { "epoch": 0.44, "learning_rate": 0.0005636334456748515, "loss": 0.0725, "theoretical_loss": 3.553263120628271, "tokens_seen": 1458569216 }, { "epoch": 0.44, "learning_rate": 0.0005635532017332691, "loss": 0.0706, "theoretical_loss": 3.553206971102852, "tokens_seen": 1458831360 }, { "epoch": 0.44, "learning_rate": 0.0005634729577916868, "loss": 0.0699, "theoretical_loss": 3.5531508344908436, "tokens_seen": 1459093504 }, { "epoch": 0.44, "learning_rate": 0.0005633927138501044, "loss": 0.0711, "theoretical_loss": 3.5530947107869557, "tokens_seen": 1459355648 }, { "epoch": 0.44, "learning_rate": 0.0005633124699085219, "loss": 0.0722, "theoretical_loss": 3.5530385999859035, "tokens_seen": 1459617792 }, { "epoch": 0.44, "learning_rate": 0.0005632322259669395, "loss": 0.0709, "theoretical_loss": 3.5529825020824033, "tokens_seen": 1459879936 }, { "epoch": 0.44, "learning_rate": 0.0005631519820253571, "loss": 0.0713, "theoretical_loss": 3.5529264170711756, "tokens_seen": 1460142080 }, { "epoch": 0.44, "learning_rate": 0.0005630717380837746, "loss": 0.0715, "theoretical_loss": 3.552870344946944, "tokens_seen": 1460404224 }, { "epoch": 0.44, "learning_rate": 0.0005629914941421923, "loss": 0.0713, "theoretical_loss": 3.5528142857044345, "tokens_seen": 1460666368 }, { "epoch": 0.44, "learning_rate": 0.0005629112502006098, "loss": 0.0707, "theoretical_loss": 3.5527582393383765, "tokens_seen": 1460928512 }, { "epoch": 0.44, "learning_rate": 0.0005628310062590275, "loss": 0.0727, "theoretical_loss": 3.5527022058435036, "tokens_seen": 1461190656 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0008472163463011384, "objective/train/docs_used": 534190, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4311131238937378, "objective/train/original_loss": 1.4311130046844482, "objective/train/theoretical_loss": 3.552646185214551, "objective/train/tokens_used": 1481912800, "objective/train/value_avg": -0.00859832763671875, "objective/train/value_loss": 0.00034199925721623003, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.50927734375, "objective/train/value_reward_corr": 0.6411651572047227, "objective/train/value_std": 0.01556396484375, "objective/train/weight_avg": 1.00099515914917, "objective/train/weighted_lm_loss": 1.4326586723327637, "objective/train/weights_max": 1.275532603263855, "objective/train/weights_min": 0.24629011750221252, "theoretical_loss": 3.552646185214551, "tokens_seen": 1461452800 }, { "epoch": 0.44, "learning_rate": 0.000562750762317445, "loss": 0.0702, "theoretical_loss": 3.552646185214551, "tokens_seen": 1461452800 }, { "epoch": 0.44, "learning_rate": 0.0005626705183758626, "loss": 0.07, "theoretical_loss": 3.552590177446257, "tokens_seen": 1461714944 }, { "epoch": 0.44, "learning_rate": 0.0005625902744342803, "loss": 0.0701, "theoretical_loss": 3.5525341825333645, "tokens_seen": 1461977088 }, { "epoch": 0.44, "learning_rate": 0.0005625100304926978, "loss": 0.0727, "theoretical_loss": 3.552478200470618, "tokens_seen": 1462239232 }, { "epoch": 0.44, "learning_rate": 0.0005624297865511154, "loss": 0.072, "theoretical_loss": 3.552422231252766, "tokens_seen": 1462501376 }, { "epoch": 0.44, "learning_rate": 0.000562349542609533, "loss": 0.0699, "theoretical_loss": 3.552366274874559, "tokens_seen": 1462763520 }, { "epoch": 0.44, "learning_rate": 0.0005622692986679506, "loss": 0.0707, "theoretical_loss": 3.5523103313307516, "tokens_seen": 1463025664 }, { "epoch": 0.44, "learning_rate": 0.0005621890547263681, "loss": 0.0729, "theoretical_loss": 3.5522544006161016, "tokens_seen": 1463287808 }, { "epoch": 0.44, "learning_rate": 0.0005621088107847857, "loss": 0.0703, "theoretical_loss": 3.5521984827253688, "tokens_seen": 1463549952 }, { "epoch": 0.44, "learning_rate": 0.0005620285668432034, "loss": 0.0686, "theoretical_loss": 3.5521425776533175, "tokens_seen": 1463812096 }, { "epoch": 0.44, "learning_rate": 0.0005619483229016209, "loss": 0.0696, "theoretical_loss": 3.5520866853947135, "tokens_seen": 1464074240 }, { "epoch": 0.44, "learning_rate": 0.0005618680789600386, "loss": 0.0688, "theoretical_loss": 3.5520308059443275, "tokens_seen": 1464336384 }, { "epoch": 0.44, "learning_rate": 0.0005617878350184561, "loss": 0.0723, "theoretical_loss": 3.5519749392969313, "tokens_seen": 1464598528 }, { "epoch": 0.44, "learning_rate": 0.0005617075910768738, "loss": 0.0714, "theoretical_loss": 3.5519190854473006, "tokens_seen": 1464860672 }, { "epoch": 0.44, "learning_rate": 0.0005616273471352913, "loss": 0.0707, "theoretical_loss": 3.5518632443902156, "tokens_seen": 1465122816 }, { "epoch": 0.44, "learning_rate": 0.0005615471031937089, "loss": 0.0703, "theoretical_loss": 3.5518074161204565, "tokens_seen": 1465384960 }, { "epoch": 0.44, "learning_rate": 0.0005614668592521265, "loss": 0.0731, "theoretical_loss": 3.5517516006328096, "tokens_seen": 1465647104 }, { "epoch": 0.44, "learning_rate": 0.000561386615310544, "loss": 0.0709, "theoretical_loss": 3.5516957979220627, "tokens_seen": 1465909248 }, { "epoch": 0.44, "learning_rate": 0.0005613063713689616, "loss": 0.071, "theoretical_loss": 3.551640007983007, "tokens_seen": 1466171392 }, { "epoch": 0.44, "learning_rate": 0.0005612261274273793, "loss": 0.0683, "theoretical_loss": 3.551584230810436, "tokens_seen": 1466433536 }, { "epoch": 0.44, "learning_rate": 0.0005611458834857969, "loss": 0.0708, "theoretical_loss": 3.551528466399148, "tokens_seen": 1466695680 }, { "epoch": 0.44, "learning_rate": 0.0005610656395442144, "loss": 0.0711, "theoretical_loss": 3.551472714743942, "tokens_seen": 1466957824 }, { "epoch": 0.44, "learning_rate": 0.0005609853956026321, "loss": 0.069, "theoretical_loss": 3.551416975839623, "tokens_seen": 1467219968 }, { "epoch": 0.44, "learning_rate": 0.0005609051516610496, "loss": 0.0699, "theoretical_loss": 3.551361249680996, "tokens_seen": 1467482112 }, { "epoch": 0.44, "learning_rate": 0.0005608249077194671, "loss": 0.069, "theoretical_loss": 3.5513055362628707, "tokens_seen": 1467744256 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0009279769728891551, "objective/train/docs_used": 535989, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4145874977111816, "objective/train/original_loss": 1.4145876169204712, "objective/train/theoretical_loss": 3.5512498355800597, "objective/train/tokens_used": 1488466400, "objective/train/value_avg": -0.00853729248046875, "objective/train/value_loss": 0.0003833299851976335, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.701171875, "objective/train/value_reward_corr": 0.6353014795865455, "objective/train/value_std": 0.017333984375, "objective/train/weight_avg": 1.0010976791381836, "objective/train/weighted_lm_loss": 1.4164438247680664, "objective/train/weights_max": 1.698567271232605, "objective/train/weights_min": 0.36841312050819397, "theoretical_loss": 3.5512498355800597, "tokens_seen": 1468006400 }, { "epoch": 0.44, "learning_rate": 0.0005607446637778848, "loss": 0.0726, "theoretical_loss": 3.5512498355800597, "tokens_seen": 1468006400 }, { "epoch": 0.44, "learning_rate": 0.0005606644198363023, "loss": 0.0696, "theoretical_loss": 3.5511941476273785, "tokens_seen": 1468268544 }, { "epoch": 0.45, "learning_rate": 0.0005605841758947199, "loss": 0.0688, "theoretical_loss": 3.551138472399646, "tokens_seen": 1468530688 }, { "epoch": 0.45, "learning_rate": 0.0005605039319531376, "loss": 0.0703, "theoretical_loss": 3.5510828098916836, "tokens_seen": 1468792832 }, { "epoch": 0.45, "learning_rate": 0.0005604236880115552, "loss": 0.0705, "theoretical_loss": 3.5510271600983154, "tokens_seen": 1469054976 }, { "epoch": 0.45, "learning_rate": 0.0005603434440699728, "loss": 0.0702, "theoretical_loss": 3.5509715230143692, "tokens_seen": 1469317120 }, { "epoch": 0.45, "learning_rate": 0.0005602632001283903, "loss": 0.0702, "theoretical_loss": 3.550915898634676, "tokens_seen": 1469579264 }, { "epoch": 0.45, "learning_rate": 0.0005601829561868079, "loss": 0.0678, "theoretical_loss": 3.550860286954069, "tokens_seen": 1469841408 }, { "epoch": 0.45, "learning_rate": 0.0005601027122452255, "loss": 0.068, "theoretical_loss": 3.5508046879673856, "tokens_seen": 1470103552 }, { "epoch": 0.45, "learning_rate": 0.0005600224683036431, "loss": 0.0721, "theoretical_loss": 3.550749101669465, "tokens_seen": 1470365696 }, { "epoch": 0.45, "learning_rate": 0.0005599422243620606, "loss": 0.0678, "theoretical_loss": 3.5506935280551497, "tokens_seen": 1470627840 }, { "epoch": 0.45, "learning_rate": 0.0005598619804204783, "loss": 0.0711, "theoretical_loss": 3.5506379671192865, "tokens_seen": 1470889984 }, { "epoch": 0.45, "learning_rate": 0.0005597817364788959, "loss": 0.0711, "theoretical_loss": 3.550582418856723, "tokens_seen": 1471152128 }, { "epoch": 0.45, "learning_rate": 0.0005597014925373134, "loss": 0.069, "theoretical_loss": 3.550526883262312, "tokens_seen": 1471414272 }, { "epoch": 0.45, "learning_rate": 0.0005596212485957311, "loss": 0.0711, "theoretical_loss": 3.550471360330907, "tokens_seen": 1471676416 }, { "epoch": 0.45, "learning_rate": 0.0005595410046541486, "loss": 0.069, "theoretical_loss": 3.550415850057367, "tokens_seen": 1471938560 }, { "epoch": 0.45, "learning_rate": 0.0005594607607125662, "loss": 0.0695, "theoretical_loss": 3.5503603524365523, "tokens_seen": 1472200704 }, { "epoch": 0.45, "learning_rate": 0.0005593805167709838, "loss": 0.0735, "theoretical_loss": 3.5503048674633266, "tokens_seen": 1472462848 }, { "epoch": 0.45, "learning_rate": 0.0005593002728294014, "loss": 0.0694, "theoretical_loss": 3.5502493951325564, "tokens_seen": 1472724992 }, { "epoch": 0.45, "learning_rate": 0.0005592200288878189, "loss": 0.0739, "theoretical_loss": 3.550193935439112, "tokens_seen": 1472987136 }, { "epoch": 0.45, "learning_rate": 0.0005591397849462365, "loss": 0.0697, "theoretical_loss": 3.5501384883778666, "tokens_seen": 1473249280 }, { "epoch": 0.45, "learning_rate": 0.0005590595410046541, "loss": 0.0729, "theoretical_loss": 3.5500830539436956, "tokens_seen": 1473511424 }, { "epoch": 0.45, "learning_rate": 0.0005589792970630718, "loss": 0.0692, "theoretical_loss": 3.550027632131477, "tokens_seen": 1473773568 }, { "epoch": 0.45, "learning_rate": 0.0005588990531214894, "loss": 0.0708, "theoretical_loss": 3.549972222936094, "tokens_seen": 1474035712 }, { "epoch": 0.45, "learning_rate": 0.0005588188091799069, "loss": 0.0696, "theoretical_loss": 3.5499168263524297, "tokens_seen": 1474297856 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.000858014915138483, "objective/train/docs_used": 538469, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.343380331993103, "objective/train/original_loss": 1.343380331993103, "objective/train/theoretical_loss": 3.549861442375373, "objective/train/tokens_used": 1495020000, "objective/train/value_avg": -0.0080413818359375, "objective/train/value_loss": 0.00021646420645993203, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.485107421875, "objective/train/value_reward_corr": 0.6710323577309563, "objective/train/value_std": 0.015411376953125, "objective/train/weight_avg": 1.0009558200836182, "objective/train/weighted_lm_loss": 1.3450497388839722, "objective/train/weights_max": 1.3448151350021362, "objective/train/weights_min": 0.36848655343055725, "theoretical_loss": 3.549861442375373, "tokens_seen": 1474560000 }, { "epoch": 0.45, "learning_rate": 0.0005587385652383246, "loss": 0.0717, "theoretical_loss": 3.549861442375373, "tokens_seen": 1474560000 }, { "epoch": 0.45, "learning_rate": 0.0005586583212967421, "loss": 0.0681, "theoretical_loss": 3.5498060709998143, "tokens_seen": 1474822144 }, { "epoch": 0.45, "learning_rate": 0.0005585780773551596, "loss": 0.071, "theoretical_loss": 3.5497507122206473, "tokens_seen": 1475084288 }, { "epoch": 0.45, "learning_rate": 0.0005584978334135773, "loss": 0.0705, "theoretical_loss": 3.5496953660327684, "tokens_seen": 1475346432 }, { "epoch": 0.45, "learning_rate": 0.0005584175894719948, "loss": 0.0705, "theoretical_loss": 3.5496400324310775, "tokens_seen": 1475608576 }, { "epoch": 0.45, "learning_rate": 0.0005583373455304124, "loss": 0.0668, "theoretical_loss": 3.5495847114104766, "tokens_seen": 1475870720 }, { "epoch": 0.45, "learning_rate": 0.0005582571015888301, "loss": 0.0721, "theoretical_loss": 3.549529402965873, "tokens_seen": 1476132864 }, { "epoch": 0.45, "learning_rate": 0.0005581768576472477, "loss": 0.0699, "theoretical_loss": 3.549474107092173, "tokens_seen": 1476395008 }, { "epoch": 0.45, "learning_rate": 0.0005580966137056652, "loss": 0.0734, "theoretical_loss": 3.5494188237842894, "tokens_seen": 1476657152 }, { "epoch": 0.45, "learning_rate": 0.0005580163697640829, "loss": 0.0716, "theoretical_loss": 3.5493635530371366, "tokens_seen": 1476919296 }, { "epoch": 0.45, "learning_rate": 0.0005579361258225004, "loss": 0.0694, "theoretical_loss": 3.5493082948456314, "tokens_seen": 1477181440 }, { "epoch": 0.45, "learning_rate": 0.0005578558818809179, "loss": 0.0722, "theoretical_loss": 3.549253049204695, "tokens_seen": 1477443584 }, { "epoch": 0.45, "learning_rate": 0.0005577756379393356, "loss": 0.0679, "theoretical_loss": 3.549197816109251, "tokens_seen": 1477705728 }, { "epoch": 0.45, "learning_rate": 0.0005576953939977531, "loss": 0.0693, "theoretical_loss": 3.549142595554224, "tokens_seen": 1477967872 }, { "epoch": 0.45, "learning_rate": 0.0005576151500561709, "loss": 0.0721, "theoretical_loss": 3.5490873875345446, "tokens_seen": 1478230016 }, { "epoch": 0.45, "learning_rate": 0.0005575349061145884, "loss": 0.0691, "theoretical_loss": 3.5490321920451446, "tokens_seen": 1478492160 }, { "epoch": 0.45, "learning_rate": 0.000557454662173006, "loss": 0.0703, "theoretical_loss": 3.54897700908096, "tokens_seen": 1478754304 }, { "epoch": 0.45, "learning_rate": 0.0005573744182314236, "loss": 0.0725, "theoretical_loss": 3.548921838636927, "tokens_seen": 1479016448 }, { "epoch": 0.45, "learning_rate": 0.0005572941742898411, "loss": 0.0726, "theoretical_loss": 3.5488666807079885, "tokens_seen": 1479278592 }, { "epoch": 0.45, "learning_rate": 0.0005572139303482587, "loss": 0.0703, "theoretical_loss": 3.5488115352890874, "tokens_seen": 1479540736 }, { "epoch": 0.45, "learning_rate": 0.0005571336864066763, "loss": 0.071, "theoretical_loss": 3.5487564023751714, "tokens_seen": 1479802880 }, { "epoch": 0.45, "learning_rate": 0.0005570534424650939, "loss": 0.0715, "theoretical_loss": 3.5487012819611894, "tokens_seen": 1480065024 }, { "epoch": 0.45, "learning_rate": 0.0005569731985235114, "loss": 0.0716, "theoretical_loss": 3.548646174042095, "tokens_seen": 1480327168 }, { "epoch": 0.45, "learning_rate": 0.0005568929545819291, "loss": 0.0727, "theoretical_loss": 3.5485910786128434, "tokens_seen": 1480589312 }, { "epoch": 0.45, "learning_rate": 0.0005568127106403467, "loss": 0.0721, "theoretical_loss": 3.5485359956683933, "tokens_seen": 1480851456 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0011171478545293212, "objective/train/docs_used": 540822, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4887363910675049, "objective/train/original_loss": 1.4887363910675049, "objective/train/theoretical_loss": 3.548480925203706, "objective/train/tokens_used": 1501573600, "objective/train/value_avg": -0.01031494140625, "objective/train/value_loss": 0.0003310543834231794, "objective/train/value_max": -8.749961853027344e-05, "objective/train/value_min": -0.446533203125, "objective/train/value_reward_corr": 0.6941375660042437, "objective/train/value_std": 0.017333984375, "objective/train/weight_avg": 1.0012686252593994, "objective/train/weighted_lm_loss": 1.490714430809021, "objective/train/weights_max": 1.4081617593765259, "objective/train/weights_min": 0.3702827990055084, "theoretical_loss": 3.548480925203706, "tokens_seen": 1481113600 }, { "epoch": 0.45, "learning_rate": 0.0005567324666987642, "loss": 0.0724, "theoretical_loss": 3.548480925203706, "tokens_seen": 1481113600 }, { "epoch": 0.45, "learning_rate": 0.0005566522227571819, "loss": 0.0711, "theoretical_loss": 3.548425867213747, "tokens_seen": 1481375744 }, { "epoch": 0.45, "learning_rate": 0.0005565719788155994, "loss": 0.0707, "theoretical_loss": 3.5483708216934833, "tokens_seen": 1481637888 }, { "epoch": 0.45, "learning_rate": 0.0005564917348740171, "loss": 0.0705, "theoretical_loss": 3.5483157886378844, "tokens_seen": 1481900032 }, { "epoch": 0.45, "learning_rate": 0.0005564114909324346, "loss": 0.0725, "theoretical_loss": 3.5482607680419243, "tokens_seen": 1482162176 }, { "epoch": 0.45, "learning_rate": 0.0005563312469908522, "loss": 0.0706, "theoretical_loss": 3.548205759900579, "tokens_seen": 1482424320 }, { "epoch": 0.45, "learning_rate": 0.0005562510030492698, "loss": 0.0706, "theoretical_loss": 3.548150764208828, "tokens_seen": 1482686464 }, { "epoch": 0.45, "learning_rate": 0.0005561707591076873, "loss": 0.0699, "theoretical_loss": 3.5480957809616527, "tokens_seen": 1482948608 }, { "epoch": 0.45, "learning_rate": 0.000556090515166105, "loss": 0.0725, "theoretical_loss": 3.548040810154038, "tokens_seen": 1483210752 }, { "epoch": 0.45, "learning_rate": 0.0005560102712245226, "loss": 0.0741, "theoretical_loss": 3.5479858517809717, "tokens_seen": 1483472896 }, { "epoch": 0.45, "learning_rate": 0.0005559300272829402, "loss": 0.0722, "theoretical_loss": 3.547930905837445, "tokens_seen": 1483735040 }, { "epoch": 0.45, "learning_rate": 0.0005558497833413577, "loss": 0.0748, "theoretical_loss": 3.547875972318451, "tokens_seen": 1483997184 }, { "epoch": 0.45, "learning_rate": 0.0005557695393997754, "loss": 0.0691, "theoretical_loss": 3.547821051218987, "tokens_seen": 1484259328 }, { "epoch": 0.45, "learning_rate": 0.0005556892954581929, "loss": 0.071, "theoretical_loss": 3.5477661425340514, "tokens_seen": 1484521472 }, { "epoch": 0.45, "learning_rate": 0.0005556090515166104, "loss": 0.0719, "theoretical_loss": 3.547711246258647, "tokens_seen": 1484783616 }, { "epoch": 0.45, "learning_rate": 0.0005555288075750281, "loss": 0.0731, "theoretical_loss": 3.547656362387779, "tokens_seen": 1485045760 }, { "epoch": 0.45, "learning_rate": 0.0005554485636334456, "loss": 0.0733, "theoretical_loss": 3.5476014909164553, "tokens_seen": 1485307904 }, { "epoch": 0.45, "learning_rate": 0.0005553683196918632, "loss": 0.0697, "theoretical_loss": 3.547546631839687, "tokens_seen": 1485570048 }, { "epoch": 0.45, "learning_rate": 0.0005552880757502809, "loss": 0.0705, "theoretical_loss": 3.547491785152488, "tokens_seen": 1485832192 }, { "epoch": 0.45, "learning_rate": 0.0005552078318086985, "loss": 0.0716, "theoretical_loss": 3.5474369508498755, "tokens_seen": 1486094336 }, { "epoch": 0.45, "learning_rate": 0.0005551275878671161, "loss": 0.0717, "theoretical_loss": 3.547382128926868, "tokens_seen": 1486356480 }, { "epoch": 0.45, "learning_rate": 0.0005550473439255337, "loss": 0.0698, "theoretical_loss": 3.5473273193784896, "tokens_seen": 1486618624 }, { "epoch": 0.45, "learning_rate": 0.0005549670999839512, "loss": 0.0696, "theoretical_loss": 3.547272522199764, "tokens_seen": 1486880768 }, { "epoch": 0.45, "learning_rate": 0.0005548868560423688, "loss": 0.072, "theoretical_loss": 3.5472177373857208, "tokens_seen": 1487142912 }, { "epoch": 0.45, "learning_rate": 0.0005548066121007864, "loss": 0.0682, "theoretical_loss": 3.5471629649313905, "tokens_seen": 1487405056 }, { "epoch": 0.45, "objective/train/advantage_avg": -3.483461114228703e-05, "objective/train/docs_used": 543335, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3849729299545288, "objective/train/original_loss": 1.3849728107452393, "objective/train/theoretical_loss": 3.547108204831807, "objective/train/tokens_used": 1508127200, "objective/train/value_avg": -0.007526397705078125, "objective/train/value_loss": 0.000498010718729347, "objective/train/value_max": -7.253885269165039e-05, "objective/train/value_min": -0.91748046875, "objective/train/value_reward_corr": 0.6365641012412713, "objective/train/value_std": 0.0164794921875, "objective/train/weight_avg": 1.0001769065856934, "objective/train/weighted_lm_loss": 1.3848627805709839, "objective/train/weights_max": 2.075793504714966, "objective/train/weights_min": 0.23170025646686554, "theoretical_loss": 3.547108204831807, "tokens_seen": 1487667200 }, { "epoch": 0.45, "learning_rate": 0.0005547263681592039, "loss": 0.0689, "theoretical_loss": 3.547108204831807, "tokens_seen": 1487667200 }, { "epoch": 0.45, "learning_rate": 0.0005546461242176217, "loss": 0.0726, "theoretical_loss": 3.547053457082008, "tokens_seen": 1487929344 }, { "epoch": 0.45, "learning_rate": 0.0005545658802760392, "loss": 0.0718, "theoretical_loss": 3.5469987216770322, "tokens_seen": 1488191488 }, { "epoch": 0.45, "learning_rate": 0.0005544856363344568, "loss": 0.0724, "theoretical_loss": 3.5469439986119227, "tokens_seen": 1488453632 }, { "epoch": 0.45, "learning_rate": 0.0005544053923928744, "loss": 0.07, "theoretical_loss": 3.5468892878817253, "tokens_seen": 1488715776 }, { "epoch": 0.45, "learning_rate": 0.0005543251484512919, "loss": 0.0714, "theoretical_loss": 3.546834589481488, "tokens_seen": 1488977920 }, { "epoch": 0.45, "learning_rate": 0.0005542449045097095, "loss": 0.07, "theoretical_loss": 3.5467799034062617, "tokens_seen": 1489240064 }, { "epoch": 0.45, "learning_rate": 0.0005541646605681271, "loss": 0.0688, "theoretical_loss": 3.546725229651101, "tokens_seen": 1489502208 }, { "epoch": 0.45, "learning_rate": 0.0005540844166265447, "loss": 0.071, "theoretical_loss": 3.5466705682110633, "tokens_seen": 1489764352 }, { "epoch": 0.45, "learning_rate": 0.0005540041726849623, "loss": 0.0715, "theoretical_loss": 3.546615919081207, "tokens_seen": 1490026496 }, { "epoch": 0.45, "learning_rate": 0.00055392392874338, "loss": 0.0679, "theoretical_loss": 3.546561282256596, "tokens_seen": 1490288640 }, { "epoch": 0.45, "learning_rate": 0.0005538436848017975, "loss": 0.0677, "theoretical_loss": 3.546506657732295, "tokens_seen": 1490550784 }, { "epoch": 0.45, "learning_rate": 0.0005537634408602151, "loss": 0.0712, "theoretical_loss": 3.546452045503372, "tokens_seen": 1490812928 }, { "epoch": 0.45, "learning_rate": 0.0005536831969186327, "loss": 0.0699, "theoretical_loss": 3.5463974455648994, "tokens_seen": 1491075072 }, { "epoch": 0.45, "learning_rate": 0.0005536029529770502, "loss": 0.0696, "theoretical_loss": 3.5463428579119505, "tokens_seen": 1491337216 }, { "epoch": 0.45, "learning_rate": 0.0005535227090354679, "loss": 0.0693, "theoretical_loss": 3.546288282539602, "tokens_seen": 1491599360 }, { "epoch": 0.45, "learning_rate": 0.0005534424650938854, "loss": 0.0701, "theoretical_loss": 3.5462337194429336, "tokens_seen": 1491861504 }, { "epoch": 0.45, "learning_rate": 0.000553362221152303, "loss": 0.0722, "theoretical_loss": 3.546179168617028, "tokens_seen": 1492123648 }, { "epoch": 0.45, "learning_rate": 0.0005532819772107206, "loss": 0.0689, "theoretical_loss": 3.54612463005697, "tokens_seen": 1492385792 }, { "epoch": 0.45, "learning_rate": 0.0005532017332691381, "loss": 0.0731, "theoretical_loss": 3.546070103757849, "tokens_seen": 1492647936 }, { "epoch": 0.45, "learning_rate": 0.0005531214893275557, "loss": 0.0681, "theoretical_loss": 3.546015589714755, "tokens_seen": 1492910080 }, { "epoch": 0.45, "learning_rate": 0.0005530412453859734, "loss": 0.0715, "theoretical_loss": 3.545961087922782, "tokens_seen": 1493172224 }, { "epoch": 0.45, "learning_rate": 0.000552961001444391, "loss": 0.0696, "theoretical_loss": 3.5459065983770266, "tokens_seen": 1493434368 }, { "epoch": 0.45, "learning_rate": 0.0005528807575028085, "loss": 0.0669, "theoretical_loss": 3.5458521210725893, "tokens_seen": 1493696512 }, { "epoch": 0.45, "learning_rate": 0.0005528005135612262, "loss": 0.069, "theoretical_loss": 3.545797656004571, "tokens_seen": 1493958656 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.002028031274676323, "objective/train/docs_used": 545793, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.288960337638855, "objective/train/original_loss": 1.2889604568481445, "objective/train/theoretical_loss": 3.545743203168077, "objective/train/tokens_used": 1514680800, "objective/train/value_avg": -0.0094146728515625, "objective/train/value_loss": 0.00047856621677055955, "objective/train/value_max": -6.973743438720703e-05, "objective/train/value_min": -0.8828125, "objective/train/value_reward_corr": 0.7416871098120547, "objective/train/value_std": 0.0251922607421875, "objective/train/weight_avg": 1.0022516250610352, "objective/train/weighted_lm_loss": 1.2908267974853516, "objective/train/weights_max": 2.2083611488342285, "objective/train/weights_min": 0.36811721324920654, "theoretical_loss": 3.545743203168077, "tokens_seen": 1494220800 }, { "epoch": 0.45, "learning_rate": 0.0005527202696196437, "loss": 0.0683, "theoretical_loss": 3.545743203168077, "tokens_seen": 1494220800 }, { "epoch": 0.45, "learning_rate": 0.0005526400256780613, "loss": 0.0692, "theoretical_loss": 3.545688762558216, "tokens_seen": 1494482944 }, { "epoch": 0.45, "learning_rate": 0.0005525597817364789, "loss": 0.0704, "theoretical_loss": 3.5456343341700984, "tokens_seen": 1494745088 }, { "epoch": 0.45, "learning_rate": 0.0005524795377948964, "loss": 0.07, "theoretical_loss": 3.545579917998838, "tokens_seen": 1495007232 }, { "epoch": 0.45, "learning_rate": 0.0005523992938533142, "loss": 0.0689, "theoretical_loss": 3.5455255140395505, "tokens_seen": 1495269376 }, { "epoch": 0.45, "learning_rate": 0.0005523190499117317, "loss": 0.071, "theoretical_loss": 3.5454711222873554, "tokens_seen": 1495531520 }, { "epoch": 0.45, "learning_rate": 0.0005522388059701493, "loss": 0.0712, "theoretical_loss": 3.545416742737375, "tokens_seen": 1495793664 }, { "epoch": 0.45, "learning_rate": 0.0005521585620285669, "loss": 0.0714, "theoretical_loss": 3.5453623753847343, "tokens_seen": 1496055808 }, { "epoch": 0.45, "learning_rate": 0.0005520783180869845, "loss": 0.0688, "theoretical_loss": 3.54530802022456, "tokens_seen": 1496317952 }, { "epoch": 0.45, "learning_rate": 0.000551998074145402, "loss": 0.0744, "theoretical_loss": 3.545253677251983, "tokens_seen": 1496580096 }, { "epoch": 0.45, "learning_rate": 0.0005519178302038196, "loss": 0.0698, "theoretical_loss": 3.5451993464621365, "tokens_seen": 1496842240 }, { "epoch": 0.45, "learning_rate": 0.0005518375862622372, "loss": 0.0688, "theoretical_loss": 3.5451450278501566, "tokens_seen": 1497104384 }, { "epoch": 0.45, "learning_rate": 0.0005517573423206547, "loss": 0.0736, "theoretical_loss": 3.545090721411182, "tokens_seen": 1497366528 }, { "epoch": 0.45, "learning_rate": 0.0005516770983790724, "loss": 0.0708, "theoretical_loss": 3.545036427140354, "tokens_seen": 1497628672 }, { "epoch": 0.45, "learning_rate": 0.00055159685443749, "loss": 0.0727, "theoretical_loss": 3.544982145032817, "tokens_seen": 1497890816 }, { "epoch": 0.45, "learning_rate": 0.0005515166104959077, "loss": 0.0694, "theoretical_loss": 3.5449278750837188, "tokens_seen": 1498152960 }, { "epoch": 0.45, "learning_rate": 0.0005514363665543252, "loss": 0.0734, "theoretical_loss": 3.5448736172882085, "tokens_seen": 1498415104 }, { "epoch": 0.45, "learning_rate": 0.0005513561226127427, "loss": 0.069, "theoretical_loss": 3.5448193716414393, "tokens_seen": 1498677248 }, { "epoch": 0.45, "learning_rate": 0.0005512758786711604, "loss": 0.071, "theoretical_loss": 3.5447651381385668, "tokens_seen": 1498939392 }, { "epoch": 0.45, "learning_rate": 0.0005511956347295779, "loss": 0.0708, "theoretical_loss": 3.544710916774749, "tokens_seen": 1499201536 }, { "epoch": 0.45, "learning_rate": 0.0005511153907879955, "loss": 0.0701, "theoretical_loss": 3.5446567075451463, "tokens_seen": 1499463680 }, { "epoch": 0.45, "learning_rate": 0.0005510351468464131, "loss": 0.0713, "theoretical_loss": 3.544602510444924, "tokens_seen": 1499725824 }, { "epoch": 0.45, "learning_rate": 0.0005509549029048307, "loss": 0.0701, "theoretical_loss": 3.544548325469247, "tokens_seen": 1499987968 }, { "epoch": 0.45, "learning_rate": 0.0005508746589632483, "loss": 0.0736, "theoretical_loss": 3.5444941526132863, "tokens_seen": 1500250112 }, { "epoch": 0.45, "learning_rate": 0.0005507944150216659, "loss": 0.0725, "theoretical_loss": 3.5444399918722134, "tokens_seen": 1500512256 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.00016552148736082017, "objective/train/docs_used": 548178, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3277549743652344, "objective/train/original_loss": 1.327755093574524, "objective/train/theoretical_loss": 3.5443858432412028, "objective/train/tokens_used": 1521234400, "objective/train/value_avg": -0.0071868896484375, "objective/train/value_loss": 0.00020315258007030934, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.98486328125, "objective/train/value_reward_corr": 0.6999380374007601, "objective/train/value_std": 0.01276397705078125, "objective/train/weight_avg": 1.0002577304840088, "objective/train/weighted_lm_loss": 1.3282614946365356, "objective/train/weights_max": 1.1451897621154785, "objective/train/weights_min": 0.23797065019607544, "theoretical_loss": 3.5443858432412028, "tokens_seen": 1500774400 }, { "epoch": 0.45, "learning_rate": 0.0005507141710800835, "loss": 0.0685, "theoretical_loss": 3.5443858432412028, "tokens_seen": 1500774400 }, { "epoch": 0.45, "learning_rate": 0.000550633927138501, "loss": 0.0691, "theoretical_loss": 3.5443317067154325, "tokens_seen": 1501036544 }, { "epoch": 0.45, "learning_rate": 0.0005505536831969187, "loss": 0.0717, "theoretical_loss": 3.544277582290083, "tokens_seen": 1501298688 }, { "epoch": 0.46, "learning_rate": 0.0005504734392553362, "loss": 0.0713, "theoretical_loss": 3.544223469960337, "tokens_seen": 1501560832 }, { "epoch": 0.46, "learning_rate": 0.0005503931953137538, "loss": 0.0723, "theoretical_loss": 3.5441693697213816, "tokens_seen": 1501822976 }, { "epoch": 0.46, "learning_rate": 0.0005503129513721714, "loss": 0.0712, "theoretical_loss": 3.5441152815684043, "tokens_seen": 1502085120 }, { "epoch": 0.46, "learning_rate": 0.0005502327074305889, "loss": 0.0697, "theoretical_loss": 3.5440612054965968, "tokens_seen": 1502347264 }, { "epoch": 0.46, "learning_rate": 0.0005501524634890067, "loss": 0.0722, "theoretical_loss": 3.544007141501154, "tokens_seen": 1502609408 }, { "epoch": 0.46, "learning_rate": 0.0005500722195474242, "loss": 0.0697, "theoretical_loss": 3.543953089577272, "tokens_seen": 1502871552 }, { "epoch": 0.46, "learning_rate": 0.0005499919756058418, "loss": 0.0717, "theoretical_loss": 3.5438990497201512, "tokens_seen": 1503133696 }, { "epoch": 0.46, "learning_rate": 0.0005499117316642594, "loss": 0.0692, "theoretical_loss": 3.5438450219249935, "tokens_seen": 1503395840 }, { "epoch": 0.46, "learning_rate": 0.000549831487722677, "loss": 0.0688, "theoretical_loss": 3.5437910061870044, "tokens_seen": 1503657984 }, { "epoch": 0.46, "learning_rate": 0.0005497512437810945, "loss": 0.071, "theoretical_loss": 3.543737002501392, "tokens_seen": 1503920128 }, { "epoch": 0.46, "learning_rate": 0.0005496709998395121, "loss": 0.0702, "theoretical_loss": 3.5436830108633663, "tokens_seen": 1504182272 }, { "epoch": 0.46, "learning_rate": 0.0005495907558979297, "loss": 0.0679, "theoretical_loss": 3.5436290312681415, "tokens_seen": 1504444416 }, { "epoch": 0.46, "learning_rate": 0.0005495105119563472, "loss": 0.0685, "theoretical_loss": 3.543575063710933, "tokens_seen": 1504706560 }, { "epoch": 0.46, "learning_rate": 0.000549430268014765, "loss": 0.0702, "theoretical_loss": 3.543521108186961, "tokens_seen": 1504968704 }, { "epoch": 0.46, "learning_rate": 0.0005493500240731825, "loss": 0.0731, "theoretical_loss": 3.543467164691445, "tokens_seen": 1505230848 }, { "epoch": 0.46, "learning_rate": 0.0005492697801316001, "loss": 0.0678, "theoretical_loss": 3.5434132332196113, "tokens_seen": 1505492992 }, { "epoch": 0.46, "learning_rate": 0.0005491895361900177, "loss": 0.0698, "theoretical_loss": 3.543359313766686, "tokens_seen": 1505755136 }, { "epoch": 0.46, "learning_rate": 0.0005491092922484352, "loss": 0.0703, "theoretical_loss": 3.543305406327899, "tokens_seen": 1506017280 }, { "epoch": 0.46, "learning_rate": 0.0005490290483068528, "loss": 0.0682, "theoretical_loss": 3.5432515108984832, "tokens_seen": 1506279424 }, { "epoch": 0.46, "learning_rate": 0.0005489488043652704, "loss": 0.0672, "theoretical_loss": 3.543197627473673, "tokens_seen": 1506541568 }, { "epoch": 0.46, "learning_rate": 0.000548868560423688, "loss": 0.0713, "theoretical_loss": 3.543143756048708, "tokens_seen": 1506803712 }, { "epoch": 0.46, "learning_rate": 0.0005487883164821056, "loss": 0.0692, "theoretical_loss": 3.5430898966188265, "tokens_seen": 1507065856 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.00014458467194344848, "objective/train/docs_used": 550589, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3810093402862549, "objective/train/original_loss": 1.3810093402862549, "objective/train/theoretical_loss": 3.543036049179274, "objective/train/tokens_used": 1527788000, "objective/train/value_avg": -0.0096435546875, "objective/train/value_loss": 0.00022767337213736027, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.396728515625, "objective/train/value_reward_corr": 0.7315385161777496, "objective/train/value_std": 0.01538848876953125, "objective/train/weight_avg": 1.0002518892288208, "objective/train/weighted_lm_loss": 1.3807989358901978, "objective/train/weights_max": 1.3733340501785278, "objective/train/weights_min": 0.3778582811355591, "theoretical_loss": 3.543036049179274, "tokens_seen": 1507328000 }, { "epoch": 0.46, "learning_rate": 0.0005487080725405232, "loss": 0.0707, "theoretical_loss": 3.543036049179274, "tokens_seen": 1507328000 }, { "epoch": 0.46, "learning_rate": 0.0005486278285989408, "loss": 0.073, "theoretical_loss": 3.5429822137252955, "tokens_seen": 1507590144 }, { "epoch": 0.46, "learning_rate": 0.0005485475846573585, "loss": 0.0696, "theoretical_loss": 3.54292839025214, "tokens_seen": 1507852288 }, { "epoch": 0.46, "learning_rate": 0.000548467340715776, "loss": 0.0697, "theoretical_loss": 3.5428745787550593, "tokens_seen": 1508114432 }, { "epoch": 0.46, "learning_rate": 0.0005483870967741935, "loss": 0.0715, "theoretical_loss": 3.5428207792293076, "tokens_seen": 1508376576 }, { "epoch": 0.46, "learning_rate": 0.0005483068528326112, "loss": 0.0698, "theoretical_loss": 3.542766991670142, "tokens_seen": 1508638720 }, { "epoch": 0.46, "learning_rate": 0.0005482266088910287, "loss": 0.0706, "theoretical_loss": 3.542713216072821, "tokens_seen": 1508900864 }, { "epoch": 0.46, "learning_rate": 0.0005481463649494463, "loss": 0.0721, "theoretical_loss": 3.542659452432608, "tokens_seen": 1509163008 }, { "epoch": 0.46, "learning_rate": 0.0005480661210078639, "loss": 0.0705, "theoretical_loss": 3.542605700744768, "tokens_seen": 1509425152 }, { "epoch": 0.46, "learning_rate": 0.0005479858770662815, "loss": 0.0694, "theoretical_loss": 3.542551961004568, "tokens_seen": 1509687296 }, { "epoch": 0.46, "learning_rate": 0.000547905633124699, "loss": 0.0716, "theoretical_loss": 3.5424982332072794, "tokens_seen": 1509949440 }, { "epoch": 0.46, "learning_rate": 0.0005478253891831167, "loss": 0.0721, "theoretical_loss": 3.5424445173481756, "tokens_seen": 1510211584 }, { "epoch": 0.46, "learning_rate": 0.0005477451452415343, "loss": 0.0711, "theoretical_loss": 3.5423908134225304, "tokens_seen": 1510473728 }, { "epoch": 0.46, "learning_rate": 0.0005476649012999519, "loss": 0.0708, "theoretical_loss": 3.5423371214256245, "tokens_seen": 1510735872 }, { "epoch": 0.46, "learning_rate": 0.0005475846573583695, "loss": 0.0708, "theoretical_loss": 3.5422834413527378, "tokens_seen": 1510998016 }, { "epoch": 0.46, "learning_rate": 0.000547504413416787, "loss": 0.0716, "theoretical_loss": 3.5422297731991548, "tokens_seen": 1511260160 }, { "epoch": 0.46, "learning_rate": 0.0005474241694752047, "loss": 0.0686, "theoretical_loss": 3.542176116960162, "tokens_seen": 1511522304 }, { "epoch": 0.46, "learning_rate": 0.0005473439255336222, "loss": 0.0718, "theoretical_loss": 3.542122472631048, "tokens_seen": 1511784448 }, { "epoch": 0.46, "learning_rate": 0.0005472636815920397, "loss": 0.072, "theoretical_loss": 3.542068840207105, "tokens_seen": 1512046592 }, { "epoch": 0.46, "learning_rate": 0.0005471834376504575, "loss": 0.0696, "theoretical_loss": 3.5420152196836288, "tokens_seen": 1512308736 }, { "epoch": 0.46, "learning_rate": 0.000547103193708875, "loss": 0.0683, "theoretical_loss": 3.541961611055915, "tokens_seen": 1512570880 }, { "epoch": 0.46, "learning_rate": 0.0005470229497672926, "loss": 0.0701, "theoretical_loss": 3.5419080143192643, "tokens_seen": 1512833024 }, { "epoch": 0.46, "learning_rate": 0.0005469427058257102, "loss": 0.0704, "theoretical_loss": 3.541854429468979, "tokens_seen": 1513095168 }, { "epoch": 0.46, "learning_rate": 0.0005468624618841278, "loss": 0.0708, "theoretical_loss": 3.5418008565003647, "tokens_seen": 1513357312 }, { "epoch": 0.46, "learning_rate": 0.0005467822179425453, "loss": 0.0718, "theoretical_loss": 3.541747295408729, "tokens_seen": 1513619456 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0009963659103959799, "objective/train/docs_used": 552897, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3523280620574951, "objective/train/original_loss": 1.3523280620574951, "objective/train/theoretical_loss": 3.541693746189383, "objective/train/tokens_used": 1534341600, "objective/train/value_avg": -0.0100250244140625, "objective/train/value_loss": 0.00032382109202444553, "objective/train/value_max": -6.252527236938477e-05, "objective/train/value_min": -0.81396484375, "objective/train/value_reward_corr": 0.6972783877515103, "objective/train/value_std": 0.01727294921875, "objective/train/weight_avg": 1.0011416673660278, "objective/train/weighted_lm_loss": 1.3530480861663818, "objective/train/weights_max": 1.5500072240829468, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.541693746189383, "tokens_seen": 1513881600 }, { "epoch": 0.46, "learning_rate": 0.0005467019740009629, "loss": 0.0701, "theoretical_loss": 3.541693746189383, "tokens_seen": 1513881600 }, { "epoch": 0.46, "learning_rate": 0.0005466217300593805, "loss": 0.0687, "theoretical_loss": 3.5416402088376397, "tokens_seen": 1514143744 }, { "epoch": 0.46, "learning_rate": 0.000546541486117798, "loss": 0.0701, "theoretical_loss": 3.5415866833488154, "tokens_seen": 1514405888 }, { "epoch": 0.46, "learning_rate": 0.0005464612421762158, "loss": 0.0709, "theoretical_loss": 3.541533169718228, "tokens_seen": 1514668032 }, { "epoch": 0.46, "learning_rate": 0.0005463809982346333, "loss": 0.0701, "theoretical_loss": 3.541479667941199, "tokens_seen": 1514930176 }, { "epoch": 0.46, "learning_rate": 0.000546300754293051, "loss": 0.0706, "theoretical_loss": 3.5414261780130527, "tokens_seen": 1515192320 }, { "epoch": 0.46, "learning_rate": 0.0005462205103514685, "loss": 0.0698, "theoretical_loss": 3.5413726999291155, "tokens_seen": 1515454464 }, { "epoch": 0.46, "learning_rate": 0.000546140266409886, "loss": 0.0724, "theoretical_loss": 3.5413192336847166, "tokens_seen": 1515716608 }, { "epoch": 0.46, "learning_rate": 0.0005460600224683037, "loss": 0.0686, "theoretical_loss": 3.5412657792751876, "tokens_seen": 1515978752 }, { "epoch": 0.46, "learning_rate": 0.0005459797785267212, "loss": 0.0698, "theoretical_loss": 3.541212336695863, "tokens_seen": 1516240896 }, { "epoch": 0.46, "learning_rate": 0.0005458995345851388, "loss": 0.0702, "theoretical_loss": 3.54115890594208, "tokens_seen": 1516503040 }, { "epoch": 0.46, "learning_rate": 0.0005458192906435564, "loss": 0.0677, "theoretical_loss": 3.5411054870091787, "tokens_seen": 1516765184 }, { "epoch": 0.46, "learning_rate": 0.000545739046701974, "loss": 0.0701, "theoretical_loss": 3.541052079892502, "tokens_seen": 1517027328 }, { "epoch": 0.46, "learning_rate": 0.0005456588027603916, "loss": 0.0709, "theoretical_loss": 3.540998684587394, "tokens_seen": 1517289472 }, { "epoch": 0.46, "learning_rate": 0.0005455785588188093, "loss": 0.071, "theoretical_loss": 3.5409453010892022, "tokens_seen": 1517551616 }, { "epoch": 0.46, "learning_rate": 0.0005454983148772268, "loss": 0.0696, "theoretical_loss": 3.540891929393278, "tokens_seen": 1517813760 }, { "epoch": 0.46, "learning_rate": 0.0005454180709356443, "loss": 0.0704, "theoretical_loss": 3.540838569494974, "tokens_seen": 1518075904 }, { "epoch": 0.46, "learning_rate": 0.000545337826994062, "loss": 0.0676, "theoretical_loss": 3.540785221389646, "tokens_seen": 1518338048 }, { "epoch": 0.46, "learning_rate": 0.0005452575830524795, "loss": 0.072, "theoretical_loss": 3.5407318850726517, "tokens_seen": 1518600192 }, { "epoch": 0.46, "learning_rate": 0.0005451773391108972, "loss": 0.0713, "theoretical_loss": 3.5406785605393525, "tokens_seen": 1518862336 }, { "epoch": 0.46, "learning_rate": 0.0005450970951693147, "loss": 0.0713, "theoretical_loss": 3.540625247785111, "tokens_seen": 1519124480 }, { "epoch": 0.46, "learning_rate": 0.0005450168512277323, "loss": 0.0701, "theoretical_loss": 3.5405719468052945, "tokens_seen": 1519386624 }, { "epoch": 0.46, "learning_rate": 0.00054493660728615, "loss": 0.0682, "theoretical_loss": 3.5405186575952716, "tokens_seen": 1519648768 }, { "epoch": 0.46, "learning_rate": 0.0005448563633445675, "loss": 0.0697, "theoretical_loss": 3.5404653801504127, "tokens_seen": 1519910912 }, { "epoch": 0.46, "learning_rate": 0.0005447761194029851, "loss": 0.0713, "theoretical_loss": 3.540412114466093, "tokens_seen": 1520173056 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.000899738457519561, "objective/train/docs_used": 555119, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4808071851730347, "objective/train/original_loss": 1.4808071851730347, "objective/train/theoretical_loss": 3.5403588605376886, "objective/train/tokens_used": 1540895200, "objective/train/value_avg": -0.00998687744140625, "objective/train/value_loss": 0.0002470078179612756, "objective/train/value_max": -8.153915405273438e-05, "objective/train/value_min": -0.38720703125, "objective/train/value_reward_corr": 0.7553053169241293, "objective/train/value_std": 0.01812744140625, "objective/train/weight_avg": 1.0010114908218384, "objective/train/weighted_lm_loss": 1.4823392629623413, "objective/train/weights_max": 1.1878193616867065, "objective/train/weights_min": 0.3683270514011383, "theoretical_loss": 3.5403588605376886, "tokens_seen": 1520435200 }, { "epoch": 0.46, "learning_rate": 0.0005446958754614027, "loss": 0.0679, "theoretical_loss": 3.5403588605376886, "tokens_seen": 1520435200 }, { "epoch": 0.46, "learning_rate": 0.0005446156315198203, "loss": 0.0681, "theoretical_loss": 3.540305618360578, "tokens_seen": 1520697344 }, { "epoch": 0.46, "learning_rate": 0.0005445353875782378, "loss": 0.0732, "theoretical_loss": 3.540252387930144, "tokens_seen": 1520959488 }, { "epoch": 0.46, "learning_rate": 0.0005444551436366555, "loss": 0.0688, "theoretical_loss": 3.540199169241771, "tokens_seen": 1521221632 }, { "epoch": 0.46, "learning_rate": 0.000544374899695073, "loss": 0.0711, "theoretical_loss": 3.540145962290845, "tokens_seen": 1521483776 }, { "epoch": 0.46, "learning_rate": 0.0005442946557534905, "loss": 0.0694, "theoretical_loss": 3.5400927670727573, "tokens_seen": 1521745920 }, { "epoch": 0.46, "learning_rate": 0.0005442144118119083, "loss": 0.0708, "theoretical_loss": 3.5400395835828986, "tokens_seen": 1522008064 }, { "epoch": 0.46, "learning_rate": 0.0005441341678703258, "loss": 0.0726, "theoretical_loss": 3.539986411816665, "tokens_seen": 1522270208 }, { "epoch": 0.46, "learning_rate": 0.0005440539239287434, "loss": 0.0723, "theoretical_loss": 3.5399332517694533, "tokens_seen": 1522532352 }, { "epoch": 0.46, "learning_rate": 0.000543973679987161, "loss": 0.0685, "theoretical_loss": 3.539880103436664, "tokens_seen": 1522794496 }, { "epoch": 0.46, "learning_rate": 0.0005438934360455786, "loss": 0.0696, "theoretical_loss": 3.5398269668136986, "tokens_seen": 1523056640 }, { "epoch": 0.46, "learning_rate": 0.0005438131921039962, "loss": 0.0677, "theoretical_loss": 3.539773841895964, "tokens_seen": 1523318784 }, { "epoch": 0.46, "learning_rate": 0.0005437329481624137, "loss": 0.0707, "theoretical_loss": 3.5397207286788666, "tokens_seen": 1523580928 }, { "epoch": 0.46, "learning_rate": 0.0005436527042208313, "loss": 0.0736, "theoretical_loss": 3.539667627157818, "tokens_seen": 1523843072 }, { "epoch": 0.46, "learning_rate": 0.0005435724602792489, "loss": 0.0704, "theoretical_loss": 3.53961453732823, "tokens_seen": 1524105216 }, { "epoch": 0.46, "learning_rate": 0.0005434922163376666, "loss": 0.0697, "theoretical_loss": 3.53956145918552, "tokens_seen": 1524367360 }, { "epoch": 0.46, "learning_rate": 0.0005434119723960841, "loss": 0.0698, "theoretical_loss": 3.5395083927251045, "tokens_seen": 1524629504 }, { "epoch": 0.46, "learning_rate": 0.0005433317284545018, "loss": 0.0737, "theoretical_loss": 3.539455337942405, "tokens_seen": 1524891648 }, { "epoch": 0.46, "learning_rate": 0.0005432514845129193, "loss": 0.068, "theoretical_loss": 3.5394022948328447, "tokens_seen": 1525153792 }, { "epoch": 0.46, "learning_rate": 0.0005431712405713368, "loss": 0.069, "theoretical_loss": 3.5393492633918497, "tokens_seen": 1525415936 }, { "epoch": 0.46, "learning_rate": 0.0005430909966297545, "loss": 0.0703, "theoretical_loss": 3.5392962436148485, "tokens_seen": 1525678080 }, { "epoch": 0.46, "learning_rate": 0.000543010752688172, "loss": 0.0682, "theoretical_loss": 3.5392432354972723, "tokens_seen": 1525940224 }, { "epoch": 0.46, "learning_rate": 0.0005429305087465896, "loss": 0.0719, "theoretical_loss": 3.5391902390345544, "tokens_seen": 1526202368 }, { "epoch": 0.46, "learning_rate": 0.0005428502648050072, "loss": 0.0695, "theoretical_loss": 3.5391372542221315, "tokens_seen": 1526464512 }, { "epoch": 0.46, "learning_rate": 0.0005427700208634248, "loss": 0.0692, "theoretical_loss": 3.539084281055443, "tokens_seen": 1526726656 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.001032950938679278, "objective/train/docs_used": 557485, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3883706331253052, "objective/train/original_loss": 1.3883706331253052, "objective/train/theoretical_loss": 3.5390313195299283, "objective/train/tokens_used": 1547448800, "objective/train/value_avg": -0.00554656982421875, "objective/train/value_loss": 0.00018550669483374804, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.630859375, "objective/train/value_reward_corr": 0.6172755507616114, "objective/train/value_std": 0.00994110107421875, "objective/train/weight_avg": 1.0011130571365356, "objective/train/weighted_lm_loss": 1.39009690284729, "objective/train/weights_max": 1.3466330766677856, "objective/train/weights_min": 0.3682669997215271, "theoretical_loss": 3.5390313195299283, "tokens_seen": 1526988800 }, { "epoch": 0.46, "learning_rate": 0.0005426897769218424, "loss": 0.0712, "theoretical_loss": 3.5390313195299283, "tokens_seen": 1526988800 }, { "epoch": 0.46, "learning_rate": 0.0005426095329802601, "loss": 0.067, "theoretical_loss": 3.538978369641033, "tokens_seen": 1527250944 }, { "epoch": 0.46, "learning_rate": 0.0005425292890386776, "loss": 0.0697, "theoretical_loss": 3.538925431384203, "tokens_seen": 1527513088 }, { "epoch": 0.46, "learning_rate": 0.0005424490450970952, "loss": 0.0702, "theoretical_loss": 3.538872504754888, "tokens_seen": 1527775232 }, { "epoch": 0.46, "learning_rate": 0.0005423688011555128, "loss": 0.0697, "theoretical_loss": 3.538819589748539, "tokens_seen": 1528037376 }, { "epoch": 0.46, "learning_rate": 0.0005422885572139303, "loss": 0.0688, "theoretical_loss": 3.5387666863606104, "tokens_seen": 1528299520 }, { "epoch": 0.46, "learning_rate": 0.000542208313272348, "loss": 0.0718, "theoretical_loss": 3.5387137945865588, "tokens_seen": 1528561664 }, { "epoch": 0.46, "learning_rate": 0.0005421280693307655, "loss": 0.0701, "theoretical_loss": 3.538660914421844, "tokens_seen": 1528823808 }, { "epoch": 0.46, "learning_rate": 0.0005420478253891831, "loss": 0.0718, "theoretical_loss": 3.5386080458619276, "tokens_seen": 1529085952 }, { "epoch": 0.46, "learning_rate": 0.0005419675814476008, "loss": 0.0709, "theoretical_loss": 3.538555188902274, "tokens_seen": 1529348096 }, { "epoch": 0.46, "learning_rate": 0.0005418873375060183, "loss": 0.0701, "theoretical_loss": 3.53850234353835, "tokens_seen": 1529610240 }, { "epoch": 0.46, "learning_rate": 0.0005418070935644359, "loss": 0.0691, "theoretical_loss": 3.5384495097656252, "tokens_seen": 1529872384 }, { "epoch": 0.46, "learning_rate": 0.0005417268496228535, "loss": 0.0723, "theoretical_loss": 3.5383966875795716, "tokens_seen": 1530134528 }, { "epoch": 0.46, "learning_rate": 0.0005416466056812711, "loss": 0.0719, "theoretical_loss": 3.538343876975664, "tokens_seen": 1530396672 }, { "epoch": 0.46, "learning_rate": 0.0005415663617396886, "loss": 0.0714, "theoretical_loss": 3.5382910779493795, "tokens_seen": 1530658816 }, { "epoch": 0.46, "learning_rate": 0.0005414861177981063, "loss": 0.07, "theoretical_loss": 3.538238290496198, "tokens_seen": 1530920960 }, { "epoch": 0.46, "learning_rate": 0.0005414058738565238, "loss": 0.0688, "theoretical_loss": 3.5381855146116017, "tokens_seen": 1531183104 }, { "epoch": 0.46, "learning_rate": 0.0005413256299149413, "loss": 0.0717, "theoretical_loss": 3.5381327502910747, "tokens_seen": 1531445248 }, { "epoch": 0.46, "learning_rate": 0.000541245385973359, "loss": 0.0708, "theoretical_loss": 3.538079997530105, "tokens_seen": 1531707392 }, { "epoch": 0.46, "learning_rate": 0.0005411651420317766, "loss": 0.072, "theoretical_loss": 3.538027256324182, "tokens_seen": 1531969536 }, { "epoch": 0.46, "learning_rate": 0.0005410848980901943, "loss": 0.0696, "theoretical_loss": 3.5379745266687985, "tokens_seen": 1532231680 }, { "epoch": 0.46, "learning_rate": 0.0005410046541486118, "loss": 0.0676, "theoretical_loss": 3.537921808559449, "tokens_seen": 1532493824 }, { "epoch": 0.46, "learning_rate": 0.0005409244102070294, "loss": 0.07, "theoretical_loss": 3.537869101991631, "tokens_seen": 1532755968 }, { "epoch": 0.46, "learning_rate": 0.000540844166265447, "loss": 0.0731, "theoretical_loss": 3.537816406960845, "tokens_seen": 1533018112 }, { "epoch": 0.46, "learning_rate": 0.0005407639223238645, "loss": 0.0715, "theoretical_loss": 3.537763723462593, "tokens_seen": 1533280256 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0005670369719155133, "objective/train/docs_used": 559697, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5663200616836548, "objective/train/original_loss": 1.5663201808929443, "objective/train/theoretical_loss": 3.537711051492379, "objective/train/tokens_used": 1554002400, "objective/train/value_avg": -0.00865936279296875, "objective/train/value_loss": 0.000221301379497163, "objective/train/value_max": -4.267692565917969e-05, "objective/train/value_min": -0.316650390625, "objective/train/value_reward_corr": 0.6577696228437152, "objective/train/value_std": 0.0131683349609375, "objective/train/weight_avg": 1.0006672143936157, "objective/train/weighted_lm_loss": 1.5662798881530762, "objective/train/weights_max": 1.3565322160720825, "objective/train/weights_min": 0.37095150351524353, "theoretical_loss": 3.537711051492379, "tokens_seen": 1533542400 }, { "epoch": 0.46, "learning_rate": 0.0005406836783822821, "loss": 0.0736, "theoretical_loss": 3.537711051492379, "tokens_seen": 1533542400 }, { "epoch": 0.46, "learning_rate": 0.0005406034344406997, "loss": 0.0708, "theoretical_loss": 3.5376583910457127, "tokens_seen": 1533804544 }, { "epoch": 0.46, "learning_rate": 0.0005405231904991174, "loss": 0.0711, "theoretical_loss": 3.537605742118102, "tokens_seen": 1534066688 }, { "epoch": 0.46, "learning_rate": 0.0005404429465575349, "loss": 0.0721, "theoretical_loss": 3.537553104705061, "tokens_seen": 1534328832 }, { "epoch": 0.47, "learning_rate": 0.0005403627026159526, "loss": 0.0747, "theoretical_loss": 3.5375004788021043, "tokens_seen": 1534590976 }, { "epoch": 0.47, "learning_rate": 0.0005402824586743701, "loss": 0.0725, "theoretical_loss": 3.537447864404749, "tokens_seen": 1534853120 }, { "epoch": 0.47, "learning_rate": 0.0005402022147327876, "loss": 0.0714, "theoretical_loss": 3.5373952615085154, "tokens_seen": 1535115264 }, { "epoch": 0.47, "learning_rate": 0.0005401219707912053, "loss": 0.0695, "theoretical_loss": 3.5373426701089263, "tokens_seen": 1535377408 }, { "epoch": 0.47, "learning_rate": 0.0005400417268496228, "loss": 0.0728, "theoretical_loss": 3.5372900902015063, "tokens_seen": 1535639552 }, { "epoch": 0.47, "learning_rate": 0.0005399614829080405, "loss": 0.0734, "theoretical_loss": 3.537237521781784, "tokens_seen": 1535901696 }, { "epoch": 0.47, "learning_rate": 0.000539881238966458, "loss": 0.072, "theoretical_loss": 3.537184964845289, "tokens_seen": 1536163840 }, { "epoch": 0.47, "learning_rate": 0.0005398009950248756, "loss": 0.0756, "theoretical_loss": 3.5371324193875533, "tokens_seen": 1536425984 }, { "epoch": 0.47, "learning_rate": 0.0005397207510832933, "loss": 0.0702, "theoretical_loss": 3.537079885404113, "tokens_seen": 1536688128 }, { "epoch": 0.47, "learning_rate": 0.0005396405071417108, "loss": 0.0732, "theoretical_loss": 3.5370273628905045, "tokens_seen": 1536950272 }, { "epoch": 0.47, "learning_rate": 0.0005395602632001284, "loss": 0.0706, "theoretical_loss": 3.5369748518422695, "tokens_seen": 1537212416 }, { "epoch": 0.47, "learning_rate": 0.000539480019258546, "loss": 0.0733, "theoretical_loss": 3.5369223522549493, "tokens_seen": 1537474560 }, { "epoch": 0.47, "learning_rate": 0.0005393997753169636, "loss": 0.0715, "theoretical_loss": 3.53686986412409, "tokens_seen": 1537736704 }, { "epoch": 0.47, "learning_rate": 0.0005393195313753811, "loss": 0.0711, "theoretical_loss": 3.5368173874452378, "tokens_seen": 1537998848 }, { "epoch": 0.47, "learning_rate": 0.0005392392874337988, "loss": 0.0716, "theoretical_loss": 3.536764922213944, "tokens_seen": 1538260992 }, { "epoch": 0.47, "learning_rate": 0.0005391590434922163, "loss": 0.071, "theoretical_loss": 3.536712468425761, "tokens_seen": 1538523136 }, { "epoch": 0.47, "learning_rate": 0.0005390787995506339, "loss": 0.0705, "theoretical_loss": 3.5366600260762433, "tokens_seen": 1538785280 }, { "epoch": 0.47, "learning_rate": 0.0005389985556090516, "loss": 0.069, "theoretical_loss": 3.5366075951609486, "tokens_seen": 1539047424 }, { "epoch": 0.47, "learning_rate": 0.0005389183116674691, "loss": 0.0712, "theoretical_loss": 3.5365551756754376, "tokens_seen": 1539309568 }, { "epoch": 0.47, "learning_rate": 0.0005388380677258867, "loss": 0.0711, "theoretical_loss": 3.5365027676152714, "tokens_seen": 1539571712 }, { "epoch": 0.47, "learning_rate": 0.0005387578237843043, "loss": 0.0726, "theoretical_loss": 3.5364503709760164, "tokens_seen": 1539833856 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0006242617382667959, "objective/train/docs_used": 562258, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4021567106246948, "objective/train/original_loss": 1.4021568298339844, "objective/train/theoretical_loss": 3.536397985753239, "objective/train/tokens_used": 1560556000, "objective/train/value_avg": -0.007061004638671875, "objective/train/value_loss": 0.00021928877686150372, "objective/train/value_max": -5.561113357543945e-05, "objective/train/value_min": -0.7822265625, "objective/train/value_reward_corr": 0.7640836101638206, "objective/train/value_std": 0.0187225341796875, "objective/train/weight_avg": 1.0007275342941284, "objective/train/weighted_lm_loss": 1.4030952453613281, "objective/train/weights_max": 1.5728368759155273, "objective/train/weights_min": 0.5366626381874084, "theoretical_loss": 3.536397985753239, "tokens_seen": 1540096000 }, { "epoch": 0.47, "learning_rate": 0.0005386775798427219, "loss": 0.0733, "theoretical_loss": 3.536397985753239, "tokens_seen": 1540096000 }, { "epoch": 0.47, "learning_rate": 0.0005385973359011395, "loss": 0.0762, "theoretical_loss": 3.53634561194251, "tokens_seen": 1540358144 }, { "epoch": 0.47, "learning_rate": 0.0005385170919595571, "loss": 0.0726, "theoretical_loss": 3.5362932495394013, "tokens_seen": 1540620288 }, { "epoch": 0.47, "learning_rate": 0.0005384368480179746, "loss": 0.0696, "theoretical_loss": 3.5362408985394875, "tokens_seen": 1540882432 }, { "epoch": 0.47, "learning_rate": 0.0005383566040763922, "loss": 0.0713, "theoretical_loss": 3.5361885589383464, "tokens_seen": 1541144576 }, { "epoch": 0.47, "learning_rate": 0.0005382763601348099, "loss": 0.0721, "theoretical_loss": 3.5361362307315574, "tokens_seen": 1541406720 }, { "epoch": 0.47, "learning_rate": 0.0005381961161932274, "loss": 0.0726, "theoretical_loss": 3.5360839139147036, "tokens_seen": 1541668864 }, { "epoch": 0.47, "learning_rate": 0.0005381158722516451, "loss": 0.0716, "theoretical_loss": 3.5360316084833685, "tokens_seen": 1541931008 }, { "epoch": 0.47, "learning_rate": 0.0005380356283100626, "loss": 0.0704, "theoretical_loss": 3.53597931443314, "tokens_seen": 1542193152 }, { "epoch": 0.47, "learning_rate": 0.0005379553843684802, "loss": 0.0723, "theoretical_loss": 3.535927031759608, "tokens_seen": 1542455296 }, { "epoch": 0.47, "learning_rate": 0.0005378751404268978, "loss": 0.072, "theoretical_loss": 3.5358747604583636, "tokens_seen": 1542717440 }, { "epoch": 0.47, "learning_rate": 0.0005377948964853153, "loss": 0.0706, "theoretical_loss": 3.5358225005250024, "tokens_seen": 1542979584 }, { "epoch": 0.47, "learning_rate": 0.0005377146525437329, "loss": 0.0735, "theoretical_loss": 3.535770251955121, "tokens_seen": 1543241728 }, { "epoch": 0.47, "learning_rate": 0.0005376344086021505, "loss": 0.07, "theoretical_loss": 3.5357180147443197, "tokens_seen": 1543503872 }, { "epoch": 0.47, "learning_rate": 0.0005375541646605681, "loss": 0.073, "theoretical_loss": 3.5356657888881986, "tokens_seen": 1543766016 }, { "epoch": 0.47, "learning_rate": 0.0005374739207189858, "loss": 0.0704, "theoretical_loss": 3.5356135743823636, "tokens_seen": 1544028160 }, { "epoch": 0.47, "learning_rate": 0.0005373936767774034, "loss": 0.0723, "theoretical_loss": 3.5355613712224203, "tokens_seen": 1544290304 }, { "epoch": 0.47, "learning_rate": 0.0005373134328358209, "loss": 0.0744, "theoretical_loss": 3.53550917940398, "tokens_seen": 1544552448 }, { "epoch": 0.47, "learning_rate": 0.0005372331888942385, "loss": 0.0691, "theoretical_loss": 3.535456998922652, "tokens_seen": 1544814592 }, { "epoch": 0.47, "learning_rate": 0.0005371529449526561, "loss": 0.0711, "theoretical_loss": 3.535404829774052, "tokens_seen": 1545076736 }, { "epoch": 0.47, "learning_rate": 0.0005370727010110736, "loss": 0.0699, "theoretical_loss": 3.535352671953796, "tokens_seen": 1545338880 }, { "epoch": 0.47, "learning_rate": 0.0005369924570694913, "loss": 0.0724, "theoretical_loss": 3.5353005254575027, "tokens_seen": 1545601024 }, { "epoch": 0.47, "learning_rate": 0.0005369122131279088, "loss": 0.0769, "theoretical_loss": 3.5352483902807945, "tokens_seen": 1545863168 }, { "epoch": 0.47, "learning_rate": 0.0005368319691863264, "loss": 0.0718, "theoretical_loss": 3.535196266419295, "tokens_seen": 1546125312 }, { "epoch": 0.47, "learning_rate": 0.0005367517252447441, "loss": 0.0721, "theoretical_loss": 3.5351441538686306, "tokens_seen": 1546387456 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0003114771971013397, "objective/train/docs_used": 564726, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4200135469436646, "objective/train/original_loss": 1.420013427734375, "objective/train/theoretical_loss": 3.535092052624429, "objective/train/tokens_used": 1567109600, "objective/train/value_avg": -0.00567626953125, "objective/train/value_loss": 0.0001583931443747133, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.67724609375, "objective/train/value_reward_corr": 0.6668066682699528, "objective/train/value_std": 0.0106658935546875, "objective/train/weight_avg": 1.0003831386566162, "objective/train/weighted_lm_loss": 1.4202409982681274, "objective/train/weights_max": 1.356201171875, "objective/train/weights_min": 0.3696674406528473, "theoretical_loss": 3.535092052624429, "tokens_seen": 1546649600 }, { "epoch": 0.47, "learning_rate": 0.0005366714813031616, "loss": 0.0688, "theoretical_loss": 3.535092052624429, "tokens_seen": 1546649600 }, { "epoch": 0.47, "learning_rate": 0.0005365912373615792, "loss": 0.0735, "theoretical_loss": 3.5350399626823226, "tokens_seen": 1546911744 }, { "epoch": 0.47, "learning_rate": 0.0005365109934199968, "loss": 0.0729, "theoretical_loss": 3.534987884037945, "tokens_seen": 1547173888 }, { "epoch": 0.47, "learning_rate": 0.0005364307494784144, "loss": 0.072, "theoretical_loss": 3.5349358166869314, "tokens_seen": 1547436032 }, { "epoch": 0.47, "learning_rate": 0.0005363505055368319, "loss": 0.0719, "theoretical_loss": 3.534883760624921, "tokens_seen": 1547698176 }, { "epoch": 0.47, "learning_rate": 0.0005362702615952496, "loss": 0.0765, "theoretical_loss": 3.534831715847555, "tokens_seen": 1547960320 }, { "epoch": 0.47, "learning_rate": 0.0005361900176536671, "loss": 0.0733, "theoretical_loss": 3.534779682350475, "tokens_seen": 1548222464 }, { "epoch": 0.47, "learning_rate": 0.0005361097737120849, "loss": 0.0748, "theoretical_loss": 3.534727660129329, "tokens_seen": 1548484608 }, { "epoch": 0.47, "learning_rate": 0.0005360295297705024, "loss": 0.071, "theoretical_loss": 3.534675649179764, "tokens_seen": 1548746752 }, { "epoch": 0.47, "learning_rate": 0.0005359492858289199, "loss": 0.0735, "theoretical_loss": 3.53462364949743, "tokens_seen": 1549008896 }, { "epoch": 0.47, "learning_rate": 0.0005358690418873376, "loss": 0.0706, "theoretical_loss": 3.5345716610779814, "tokens_seen": 1549271040 }, { "epoch": 0.47, "learning_rate": 0.0005357887979457551, "loss": 0.0721, "theoretical_loss": 3.5345196839170723, "tokens_seen": 1549533184 }, { "epoch": 0.47, "learning_rate": 0.0005357085540041727, "loss": 0.0737, "theoretical_loss": 3.534467718010361, "tokens_seen": 1549795328 }, { "epoch": 0.47, "learning_rate": 0.0005356283100625903, "loss": 0.0716, "theoretical_loss": 3.5344157633535085, "tokens_seen": 1550057472 }, { "epoch": 0.47, "learning_rate": 0.0005355480661210079, "loss": 0.0723, "theoretical_loss": 3.5343638199421763, "tokens_seen": 1550319616 }, { "epoch": 0.47, "learning_rate": 0.0005354678221794254, "loss": 0.0732, "theoretical_loss": 3.5343118877720294, "tokens_seen": 1550581760 }, { "epoch": 0.47, "learning_rate": 0.000535387578237843, "loss": 0.0717, "theoretical_loss": 3.534259966838736, "tokens_seen": 1550843904 }, { "epoch": 0.47, "learning_rate": 0.0005353073342962607, "loss": 0.0696, "theoretical_loss": 3.534208057137966, "tokens_seen": 1551106048 }, { "epoch": 0.47, "learning_rate": 0.0005352270903546782, "loss": 0.0727, "theoretical_loss": 3.5341561586653905, "tokens_seen": 1551368192 }, { "epoch": 0.47, "learning_rate": 0.0005351468464130959, "loss": 0.0693, "theoretical_loss": 3.5341042714166853, "tokens_seen": 1551630336 }, { "epoch": 0.47, "learning_rate": 0.0005350666024715134, "loss": 0.0702, "theoretical_loss": 3.5340523953875267, "tokens_seen": 1551892480 }, { "epoch": 0.47, "learning_rate": 0.0005349863585299311, "loss": 0.0706, "theoretical_loss": 3.5340005305735946, "tokens_seen": 1552154624 }, { "epoch": 0.47, "learning_rate": 0.0005349061145883486, "loss": 0.0686, "theoretical_loss": 3.533948676970571, "tokens_seen": 1552416768 }, { "epoch": 0.47, "learning_rate": 0.0005348258706467661, "loss": 0.072, "theoretical_loss": 3.533896834574139, "tokens_seen": 1552678912 }, { "epoch": 0.47, "learning_rate": 0.0005347456267051838, "loss": 0.0722, "theoretical_loss": 3.5338450033799864, "tokens_seen": 1552941056 }, { "epoch": 0.47, "objective/train/advantage_avg": -0.0007033547153696418, "objective/train/docs_used": 567156, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3911396265029907, "objective/train/original_loss": 1.3911396265029907, "objective/train/theoretical_loss": 3.533793183383802, "objective/train/tokens_used": 1573663200, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.00024034272064454854, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.38916015625, "objective/train/value_reward_corr": 0.6741884057137439, "objective/train/value_std": 0.013458251953125, "objective/train/weight_avg": 0.9994078874588013, "objective/train/weighted_lm_loss": 1.3903698921203613, "objective/train/weights_max": 1.2013026475906372, "objective/train/weights_min": 0.37362140417099, "theoretical_loss": 3.533793183383802, "tokens_seen": 1553203200 }, { "epoch": 0.47, "learning_rate": 0.0005346653827636013, "loss": 0.0693, "theoretical_loss": 3.533793183383802, "tokens_seen": 1553203200 }, { "epoch": 0.47, "learning_rate": 0.000534585138822019, "loss": 0.0685, "theoretical_loss": 3.5337413745812767, "tokens_seen": 1553465344 }, { "epoch": 0.47, "learning_rate": 0.0005345048948804366, "loss": 0.0718, "theoretical_loss": 3.5336895769681043, "tokens_seen": 1553727488 }, { "epoch": 0.47, "learning_rate": 0.0005344246509388542, "loss": 0.0697, "theoretical_loss": 3.533637790539981, "tokens_seen": 1553989632 }, { "epoch": 0.47, "learning_rate": 0.0005343444069972717, "loss": 0.0671, "theoretical_loss": 3.533586015292606, "tokens_seen": 1554251776 }, { "epoch": 0.47, "learning_rate": 0.0005342641630556893, "loss": 0.0708, "theoretical_loss": 3.5335342512216794, "tokens_seen": 1554513920 }, { "epoch": 0.47, "learning_rate": 0.0005341839191141069, "loss": 0.073, "theoretical_loss": 3.5334824983229045, "tokens_seen": 1554776064 }, { "epoch": 0.47, "learning_rate": 0.0005341036751725244, "loss": 0.072, "theoretical_loss": 3.5334307565919874, "tokens_seen": 1555038208 }, { "epoch": 0.47, "learning_rate": 0.0005340234312309421, "loss": 0.0695, "theoretical_loss": 3.533379026024636, "tokens_seen": 1555300352 }, { "epoch": 0.47, "learning_rate": 0.0005339431872893596, "loss": 0.0719, "theoretical_loss": 3.53332730661656, "tokens_seen": 1555562496 }, { "epoch": 0.47, "learning_rate": 0.0005338629433477772, "loss": 0.0716, "theoretical_loss": 3.533275598363473, "tokens_seen": 1555824640 }, { "epoch": 0.47, "learning_rate": 0.0005337826994061949, "loss": 0.0676, "theoretical_loss": 3.53322390126109, "tokens_seen": 1556086784 }, { "epoch": 0.47, "learning_rate": 0.0005337024554646124, "loss": 0.0731, "theoretical_loss": 3.533172215305129, "tokens_seen": 1556348928 }, { "epoch": 0.47, "learning_rate": 0.0005336222115230301, "loss": 0.0681, "theoretical_loss": 3.533120540491309, "tokens_seen": 1556611072 }, { "epoch": 0.47, "learning_rate": 0.0005335419675814476, "loss": 0.0728, "theoretical_loss": 3.533068876815352, "tokens_seen": 1556873216 }, { "epoch": 0.47, "learning_rate": 0.0005334617236398652, "loss": 0.0687, "theoretical_loss": 3.5330172242729834, "tokens_seen": 1557135360 }, { "epoch": 0.47, "learning_rate": 0.0005333814796982828, "loss": 0.072, "theoretical_loss": 3.53296558285993, "tokens_seen": 1557397504 }, { "epoch": 0.47, "learning_rate": 0.0005333012357567004, "loss": 0.067, "theoretical_loss": 3.532913952571921, "tokens_seen": 1557659648 }, { "epoch": 0.47, "learning_rate": 0.0005332209918151179, "loss": 0.0699, "theoretical_loss": 3.5328623334046885, "tokens_seen": 1557921792 }, { "epoch": 0.47, "learning_rate": 0.0005331407478735357, "loss": 0.0697, "theoretical_loss": 3.532810725353966, "tokens_seen": 1558183936 }, { "epoch": 0.47, "learning_rate": 0.0005330605039319532, "loss": 0.0721, "theoretical_loss": 3.5327591284154893, "tokens_seen": 1558446080 }, { "epoch": 0.47, "learning_rate": 0.0005329802599903707, "loss": 0.0692, "theoretical_loss": 3.5327075425849985, "tokens_seen": 1558708224 }, { "epoch": 0.47, "learning_rate": 0.0005329000160487884, "loss": 0.0704, "theoretical_loss": 3.5326559678582337, "tokens_seen": 1558970368 }, { "epoch": 0.47, "learning_rate": 0.0005328197721072059, "loss": 0.0701, "theoretical_loss": 3.532604404230939, "tokens_seen": 1559232512 }, { "epoch": 0.47, "learning_rate": 0.0005327395281656235, "loss": 0.0688, "theoretical_loss": 3.53255285169886, "tokens_seen": 1559494656 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0007141765672713518, "objective/train/docs_used": 569616, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4324969053268433, "objective/train/original_loss": 1.4324970245361328, "objective/train/theoretical_loss": 3.5325013102577443, "objective/train/tokens_used": 1580216800, "objective/train/value_avg": -0.00835418701171875, "objective/train/value_loss": 0.0002993447124026716, "objective/train/value_max": -0.00010073184967041016, "objective/train/value_min": -0.72412109375, "objective/train/value_reward_corr": 0.7243982875759303, "objective/train/value_std": 0.018646240234375, "objective/train/weight_avg": 1.0008517503738403, "objective/train/weighted_lm_loss": 1.433107614517212, "objective/train/weights_max": 1.4205234050750732, "objective/train/weights_min": 0.36909955739974976, "theoretical_loss": 3.5325013102577443, "tokens_seen": 1559756800 }, { "epoch": 0.47, "learning_rate": 0.0005326592842240411, "loss": 0.0721, "theoretical_loss": 3.5325013102577443, "tokens_seen": 1559756800 }, { "epoch": 0.47, "learning_rate": 0.0005325790402824587, "loss": 0.0729, "theoretical_loss": 3.532449779903343, "tokens_seen": 1560018944 }, { "epoch": 0.47, "learning_rate": 0.0005324987963408762, "loss": 0.0693, "theoretical_loss": 3.5323982606314086, "tokens_seen": 1560281088 }, { "epoch": 0.47, "learning_rate": 0.0005324185523992938, "loss": 0.0722, "theoretical_loss": 3.5323467524376966, "tokens_seen": 1560543232 }, { "epoch": 0.47, "learning_rate": 0.0005323383084577115, "loss": 0.0715, "theoretical_loss": 3.532295255317964, "tokens_seen": 1560805376 }, { "epoch": 0.47, "learning_rate": 0.0005322580645161291, "loss": 0.0673, "theoretical_loss": 3.532243769267971, "tokens_seen": 1561067520 }, { "epoch": 0.47, "learning_rate": 0.0005321778205745467, "loss": 0.0701, "theoretical_loss": 3.5321922942834796, "tokens_seen": 1561329664 }, { "epoch": 0.47, "learning_rate": 0.0005320975766329642, "loss": 0.0738, "theoretical_loss": 3.5321408303602544, "tokens_seen": 1561591808 }, { "epoch": 0.47, "learning_rate": 0.0005320173326913819, "loss": 0.0701, "theoretical_loss": 3.532089377494062, "tokens_seen": 1561853952 }, { "epoch": 0.47, "learning_rate": 0.0005319370887497994, "loss": 0.0753, "theoretical_loss": 3.532037935680672, "tokens_seen": 1562116096 }, { "epoch": 0.47, "learning_rate": 0.0005318568448082169, "loss": 0.0714, "theoretical_loss": 3.5319865049158556, "tokens_seen": 1562378240 }, { "epoch": 0.47, "learning_rate": 0.0005317766008666346, "loss": 0.0686, "theoretical_loss": 3.5319350851953866, "tokens_seen": 1562640384 }, { "epoch": 0.47, "learning_rate": 0.0005316963569250521, "loss": 0.0703, "theoretical_loss": 3.531883676515041, "tokens_seen": 1562902528 }, { "epoch": 0.47, "learning_rate": 0.0005316161129834697, "loss": 0.073, "theoretical_loss": 3.5318322788705974, "tokens_seen": 1563164672 }, { "epoch": 0.47, "learning_rate": 0.0005315358690418874, "loss": 0.068, "theoretical_loss": 3.531780892257837, "tokens_seen": 1563426816 }, { "epoch": 0.47, "learning_rate": 0.000531455625100305, "loss": 0.0691, "theoretical_loss": 3.5317295166725424, "tokens_seen": 1563688960 }, { "epoch": 0.47, "learning_rate": 0.0005313753811587225, "loss": 0.0688, "theoretical_loss": 3.5316781521104996, "tokens_seen": 1563951104 }, { "epoch": 0.47, "learning_rate": 0.0005312951372171401, "loss": 0.0687, "theoretical_loss": 3.5316267985674954, "tokens_seen": 1564213248 }, { "epoch": 0.47, "learning_rate": 0.0005312148932755577, "loss": 0.0695, "theoretical_loss": 3.5315754560393207, "tokens_seen": 1564475392 }, { "epoch": 0.47, "learning_rate": 0.0005311346493339753, "loss": 0.0688, "theoretical_loss": 3.5315241245217672, "tokens_seen": 1564737536 }, { "epoch": 0.47, "learning_rate": 0.0005310544053923929, "loss": 0.0697, "theoretical_loss": 3.5314728040106305, "tokens_seen": 1564999680 }, { "epoch": 0.47, "learning_rate": 0.0005309741614508104, "loss": 0.073, "theoretical_loss": 3.531421494501707, "tokens_seen": 1565261824 }, { "epoch": 0.47, "learning_rate": 0.0005308939175092282, "loss": 0.0713, "theoretical_loss": 3.5313701959907955, "tokens_seen": 1565523968 }, { "epoch": 0.47, "learning_rate": 0.0005308136735676457, "loss": 0.0715, "theoretical_loss": 3.5313189084736987, "tokens_seen": 1565786112 }, { "epoch": 0.47, "learning_rate": 0.0005307334296260632, "loss": 0.0715, "theoretical_loss": 3.5312676319462195, "tokens_seen": 1566048256 }, { "epoch": 0.47, "objective/train/advantage_avg": -0.00031399555155076087, "objective/train/docs_used": 572070, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3572434186935425, "objective/train/original_loss": 1.3572434186935425, "objective/train/theoretical_loss": 3.531216366404165, "objective/train/tokens_used": 1586770400, "objective/train/value_avg": -0.009185791015625, "objective/train/value_loss": 0.00043531539267860353, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.97998046875, "objective/train/value_reward_corr": 0.7681454448311413, "objective/train/value_std": 0.0237579345703125, "objective/train/weight_avg": 0.999882698059082, "objective/train/weighted_lm_loss": 1.3566851615905762, "objective/train/weights_max": 2.3744029998779297, "objective/train/weights_min": 0.3780485987663269, "theoretical_loss": 3.531216366404165, "tokens_seen": 1566310400 }, { "epoch": 0.47, "learning_rate": 0.0005306531856844809, "loss": 0.0684, "theoretical_loss": 3.531216366404165, "tokens_seen": 1566310400 }, { "epoch": 0.47, "learning_rate": 0.0005305729417428984, "loss": 0.0682, "theoretical_loss": 3.531165111843343, "tokens_seen": 1566572544 }, { "epoch": 0.47, "learning_rate": 0.000530492697801316, "loss": 0.0709, "theoretical_loss": 3.531113868259565, "tokens_seen": 1566834688 }, { "epoch": 0.47, "learning_rate": 0.0005304124538597336, "loss": 0.0684, "theoretical_loss": 3.5310626356486434, "tokens_seen": 1567096832 }, { "epoch": 0.47, "learning_rate": 0.0005303322099181512, "loss": 0.0689, "theoretical_loss": 3.5310114140063944, "tokens_seen": 1567358976 }, { "epoch": 0.48, "learning_rate": 0.0005302519659765687, "loss": 0.0703, "theoretical_loss": 3.530960203328635, "tokens_seen": 1567621120 }, { "epoch": 0.48, "learning_rate": 0.0005301717220349863, "loss": 0.0704, "theoretical_loss": 3.5309090036111854, "tokens_seen": 1567883264 }, { "epoch": 0.48, "learning_rate": 0.000530091478093404, "loss": 0.0704, "theoretical_loss": 3.5308578148498686, "tokens_seen": 1568145408 }, { "epoch": 0.48, "learning_rate": 0.0005300112341518215, "loss": 0.0705, "theoretical_loss": 3.5308066370405076, "tokens_seen": 1568407552 }, { "epoch": 0.48, "learning_rate": 0.0005299309902102392, "loss": 0.0695, "theoretical_loss": 3.530755470178931, "tokens_seen": 1568669696 }, { "epoch": 0.48, "learning_rate": 0.0005298507462686567, "loss": 0.0697, "theoretical_loss": 3.5307043142609666, "tokens_seen": 1568931840 }, { "epoch": 0.48, "learning_rate": 0.0005297705023270744, "loss": 0.0696, "theoretical_loss": 3.530653169282447, "tokens_seen": 1569193984 }, { "epoch": 0.48, "learning_rate": 0.0005296902583854919, "loss": 0.0689, "theoretical_loss": 3.5306020352392053, "tokens_seen": 1569456128 }, { "epoch": 0.48, "learning_rate": 0.0005296100144439095, "loss": 0.0699, "theoretical_loss": 3.5305509121270777, "tokens_seen": 1569718272 }, { "epoch": 0.48, "learning_rate": 0.0005295297705023271, "loss": 0.0664, "theoretical_loss": 3.5304997999419028, "tokens_seen": 1569980416 }, { "epoch": 0.48, "learning_rate": 0.0005294495265607446, "loss": 0.0693, "theoretical_loss": 3.5304486986795203, "tokens_seen": 1570242560 }, { "epoch": 0.48, "learning_rate": 0.0005293692826191623, "loss": 0.0701, "theoretical_loss": 3.5303976083357735, "tokens_seen": 1570504704 }, { "epoch": 0.48, "learning_rate": 0.0005292890386775799, "loss": 0.0648, "theoretical_loss": 3.5303465289065077, "tokens_seen": 1570766848 }, { "epoch": 0.48, "learning_rate": 0.0005292087947359975, "loss": 0.0674, "theoretical_loss": 3.5302954603875696, "tokens_seen": 1571028992 }, { "epoch": 0.48, "learning_rate": 0.000529128550794415, "loss": 0.0695, "theoretical_loss": 3.5302444027748106, "tokens_seen": 1571291136 }, { "epoch": 0.48, "learning_rate": 0.0005290483068528327, "loss": 0.0702, "theoretical_loss": 3.530193356064081, "tokens_seen": 1571553280 }, { "epoch": 0.48, "learning_rate": 0.0005289680629112502, "loss": 0.0689, "theoretical_loss": 3.530142320251235, "tokens_seen": 1571815424 }, { "epoch": 0.48, "learning_rate": 0.0005288878189696677, "loss": 0.0688, "theoretical_loss": 3.5300912953321304, "tokens_seen": 1572077568 }, { "epoch": 0.48, "learning_rate": 0.0005288075750280854, "loss": 0.0685, "theoretical_loss": 3.530040281302625, "tokens_seen": 1572339712 }, { "epoch": 0.48, "learning_rate": 0.0005287273310865029, "loss": 0.0736, "theoretical_loss": 3.5299892781585793, "tokens_seen": 1572601856 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0002667908265721053, "objective/train/docs_used": 574423, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4923175573349, "objective/train/original_loss": 1.492317795753479, "objective/train/theoretical_loss": 3.529938285895858, "objective/train/tokens_used": 1593324000, "objective/train/value_avg": -0.0081787109375, "objective/train/value_loss": 0.0002718984615057707, "objective/train/value_max": -8.481740951538086e-05, "objective/train/value_min": -0.552734375, "objective/train/value_reward_corr": 0.7223534480290491, "objective/train/value_std": 0.0150909423828125, "objective/train/weight_avg": 1.0003913640975952, "objective/train/weighted_lm_loss": 1.4929695129394531, "objective/train/weights_max": 1.278699278831482, "objective/train/weights_min": 0.3693629503250122, "theoretical_loss": 3.529938285895858, "tokens_seen": 1572864000 }, { "epoch": 0.48, "learning_rate": 0.0005286470871449207, "loss": 0.0723, "theoretical_loss": 3.529938285895858, "tokens_seen": 1572864000 }, { "epoch": 0.48, "learning_rate": 0.0005285668432033382, "loss": 0.0671, "theoretical_loss": 3.529887304510326, "tokens_seen": 1573126144 }, { "epoch": 0.48, "learning_rate": 0.0005284865992617558, "loss": 0.0665, "theoretical_loss": 3.5298363339978502, "tokens_seen": 1573388288 }, { "epoch": 0.48, "learning_rate": 0.0005284063553201734, "loss": 0.0673, "theoretical_loss": 3.529785374354302, "tokens_seen": 1573650432 }, { "epoch": 0.48, "learning_rate": 0.0005283261113785909, "loss": 0.0687, "theoretical_loss": 3.529734425575553, "tokens_seen": 1573912576 }, { "epoch": 0.48, "learning_rate": 0.0005282458674370085, "loss": 0.0688, "theoretical_loss": 3.5296834876574783, "tokens_seen": 1574174720 }, { "epoch": 0.48, "learning_rate": 0.0005281656234954261, "loss": 0.0702, "theoretical_loss": 3.529632560595954, "tokens_seen": 1574436864 }, { "epoch": 0.48, "learning_rate": 0.0005280853795538437, "loss": 0.0676, "theoretical_loss": 3.5295816443868593, "tokens_seen": 1574699008 }, { "epoch": 0.48, "learning_rate": 0.0005280051356122612, "loss": 0.0643, "theoretical_loss": 3.529530739026076, "tokens_seen": 1574961152 }, { "epoch": 0.48, "learning_rate": 0.000527924891670679, "loss": 0.0678, "theoretical_loss": 3.5294798445094875, "tokens_seen": 1575223296 }, { "epoch": 0.48, "learning_rate": 0.0005278446477290965, "loss": 0.0702, "theoretical_loss": 3.529428960832979, "tokens_seen": 1575485440 }, { "epoch": 0.48, "learning_rate": 0.000527764403787514, "loss": 0.0679, "theoretical_loss": 3.5293780879924395, "tokens_seen": 1575747584 }, { "epoch": 0.48, "learning_rate": 0.0005276841598459317, "loss": 0.0664, "theoretical_loss": 3.529327225983759, "tokens_seen": 1576009728 }, { "epoch": 0.48, "learning_rate": 0.0005276039159043492, "loss": 0.0683, "theoretical_loss": 3.5292763748028295, "tokens_seen": 1576271872 }, { "epoch": 0.48, "learning_rate": 0.0005275236719627668, "loss": 0.0684, "theoretical_loss": 3.5292255344455468, "tokens_seen": 1576534016 }, { "epoch": 0.48, "learning_rate": 0.0005274434280211844, "loss": 0.0677, "theoretical_loss": 3.529174704907807, "tokens_seen": 1576796160 }, { "epoch": 0.48, "learning_rate": 0.000527363184079602, "loss": 0.069, "theoretical_loss": 3.529123886185509, "tokens_seen": 1577058304 }, { "epoch": 0.48, "learning_rate": 0.0005272829401380196, "loss": 0.0671, "theoretical_loss": 3.529073078274556, "tokens_seen": 1577320448 }, { "epoch": 0.48, "learning_rate": 0.0005272026961964371, "loss": 0.0679, "theoretical_loss": 3.5290222811708505, "tokens_seen": 1577582592 }, { "epoch": 0.48, "learning_rate": 0.0005271224522548548, "loss": 0.0703, "theoretical_loss": 3.528971494870299, "tokens_seen": 1577844736 }, { "epoch": 0.48, "learning_rate": 0.0005270422083132724, "loss": 0.0663, "theoretical_loss": 3.5289207193688092, "tokens_seen": 1578106880 }, { "epoch": 0.48, "learning_rate": 0.00052696196437169, "loss": 0.0671, "theoretical_loss": 3.5288699546622913, "tokens_seen": 1578369024 }, { "epoch": 0.48, "learning_rate": 0.0005268817204301075, "loss": 0.07, "theoretical_loss": 3.528819200746659, "tokens_seen": 1578631168 }, { "epoch": 0.48, "learning_rate": 0.0005268014764885252, "loss": 0.0709, "theoretical_loss": 3.5287684576178258, "tokens_seen": 1578893312 }, { "epoch": 0.48, "learning_rate": 0.0005267212325469427, "loss": 0.0711, "theoretical_loss": 3.5287177252717097, "tokens_seen": 1579155456 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0015256733167916536, "objective/train/docs_used": 576827, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.413283348083496, "objective/train/original_loss": 1.413283348083496, "objective/train/theoretical_loss": 3.5286670037042303, "objective/train/tokens_used": 1599877600, "objective/train/value_avg": -0.007694244384765625, "objective/train/value_loss": 0.00026877259369939566, "objective/train/value_max": -8.481740951538086e-05, "objective/train/value_min": -0.9794921875, "objective/train/value_reward_corr": 0.7313708202021553, "objective/train/value_std": 0.0199737548828125, "objective/train/weight_avg": 1.0016505718231201, "objective/train/weighted_lm_loss": 1.415053367614746, "objective/train/weights_max": 2.1128692626953125, "objective/train/weights_min": 0.3688335716724396, "theoretical_loss": 3.5286670037042303, "tokens_seen": 1579417600 }, { "epoch": 0.48, "learning_rate": 0.0005266409886053603, "loss": 0.07, "theoretical_loss": 3.5286670037042303, "tokens_seen": 1579417600 }, { "epoch": 0.48, "learning_rate": 0.0005265607446637779, "loss": 0.0683, "theoretical_loss": 3.528616292911309, "tokens_seen": 1579679744 }, { "epoch": 0.48, "learning_rate": 0.0005264805007221954, "loss": 0.07, "theoretical_loss": 3.5285655928888686, "tokens_seen": 1579941888 }, { "epoch": 0.48, "learning_rate": 0.000526400256780613, "loss": 0.0688, "theoretical_loss": 3.5285149036328356, "tokens_seen": 1580204032 }, { "epoch": 0.48, "learning_rate": 0.0005263200128390307, "loss": 0.0696, "theoretical_loss": 3.528464225139139, "tokens_seen": 1580466176 }, { "epoch": 0.48, "learning_rate": 0.0005262397688974483, "loss": 0.0731, "theoretical_loss": 3.5284135574037085, "tokens_seen": 1580728320 }, { "epoch": 0.48, "learning_rate": 0.0005261595249558658, "loss": 0.0699, "theoretical_loss": 3.5283629004224766, "tokens_seen": 1580990464 }, { "epoch": 0.48, "learning_rate": 0.0005260792810142835, "loss": 0.0709, "theoretical_loss": 3.5283122541913787, "tokens_seen": 1581252608 }, { "epoch": 0.48, "learning_rate": 0.000525999037072701, "loss": 0.0687, "theoretical_loss": 3.5282616187063516, "tokens_seen": 1581514752 }, { "epoch": 0.48, "learning_rate": 0.0005259187931311186, "loss": 0.0686, "theoretical_loss": 3.528210993963334, "tokens_seen": 1581776896 }, { "epoch": 0.48, "learning_rate": 0.0005258385491895362, "loss": 0.0714, "theoretical_loss": 3.528160379958268, "tokens_seen": 1582039040 }, { "epoch": 0.48, "learning_rate": 0.0005257583052479537, "loss": 0.0707, "theoretical_loss": 3.528109776687097, "tokens_seen": 1582301184 }, { "epoch": 0.48, "learning_rate": 0.0005256780613063715, "loss": 0.0721, "theoretical_loss": 3.528059184145767, "tokens_seen": 1582563328 }, { "epoch": 0.48, "learning_rate": 0.000525597817364789, "loss": 0.0704, "theoretical_loss": 3.5280086023302264, "tokens_seen": 1582825472 }, { "epoch": 0.48, "learning_rate": 0.0005255175734232066, "loss": 0.0687, "theoretical_loss": 3.527958031236425, "tokens_seen": 1583087616 }, { "epoch": 0.48, "learning_rate": 0.0005254373294816242, "loss": 0.0693, "theoretical_loss": 3.527907470860315, "tokens_seen": 1583349760 }, { "epoch": 0.48, "learning_rate": 0.0005253570855400417, "loss": 0.0709, "theoretical_loss": 3.527856921197852, "tokens_seen": 1583611904 }, { "epoch": 0.48, "learning_rate": 0.0005252768415984593, "loss": 0.0709, "theoretical_loss": 3.5278063822449925, "tokens_seen": 1583874048 }, { "epoch": 0.48, "learning_rate": 0.0005251965976568769, "loss": 0.0699, "theoretical_loss": 3.5277558539976956, "tokens_seen": 1584136192 }, { "epoch": 0.48, "learning_rate": 0.0005251163537152945, "loss": 0.0702, "theoretical_loss": 3.5277053364519215, "tokens_seen": 1584398336 }, { "epoch": 0.48, "learning_rate": 0.000525036109773712, "loss": 0.0707, "theoretical_loss": 3.5276548296036356, "tokens_seen": 1584660480 }, { "epoch": 0.48, "learning_rate": 0.0005249558658321298, "loss": 0.0684, "theoretical_loss": 3.527604333448802, "tokens_seen": 1584922624 }, { "epoch": 0.48, "learning_rate": 0.0005248756218905473, "loss": 0.0687, "theoretical_loss": 3.527553847983389, "tokens_seen": 1585184768 }, { "epoch": 0.48, "learning_rate": 0.0005247953779489648, "loss": 0.07, "theoretical_loss": 3.5275033732033667, "tokens_seen": 1585446912 }, { "epoch": 0.48, "learning_rate": 0.0005247151340073825, "loss": 0.067, "theoretical_loss": 3.5274529091047073, "tokens_seen": 1585709056 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.00030591359245590866, "objective/train/docs_used": 579278, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2970331907272339, "objective/train/original_loss": 1.2970331907272339, "objective/train/theoretical_loss": 3.527402455683385, "objective/train/tokens_used": 1606431200, "objective/train/value_avg": -0.00768280029296875, "objective/train/value_loss": 0.00016412846161983907, "objective/train/value_max": -5.608797073364258e-05, "objective/train/value_min": -0.393310546875, "objective/train/value_reward_corr": 0.7478747737276108, "objective/train/value_std": 0.013519287109375, "objective/train/weight_avg": 1.000381588935852, "objective/train/weighted_lm_loss": 1.2976466417312622, "objective/train/weights_max": 1.1971355676651, "objective/train/weights_min": 0.36882299184799194, "theoretical_loss": 3.527402455683385, "tokens_seen": 1585971200 }, { "epoch": 0.48, "learning_rate": 0.0005246348900658, "loss": 0.0679, "theoretical_loss": 3.527402455683385, "tokens_seen": 1585971200 }, { "epoch": 0.48, "learning_rate": 0.0005245546461242177, "loss": 0.0697, "theoretical_loss": 3.527352012935377, "tokens_seen": 1586233344 }, { "epoch": 0.48, "learning_rate": 0.0005244744021826352, "loss": 0.0696, "theoretical_loss": 3.527301580856661, "tokens_seen": 1586495488 }, { "epoch": 0.48, "learning_rate": 0.0005243941582410528, "loss": 0.0702, "theoretical_loss": 3.527251159443219, "tokens_seen": 1586757632 }, { "epoch": 0.48, "learning_rate": 0.0005243139142994704, "loss": 0.0722, "theoretical_loss": 3.5272007486910333, "tokens_seen": 1587019776 }, { "epoch": 0.48, "learning_rate": 0.0005242336703578879, "loss": 0.0701, "theoretical_loss": 3.5271503485960896, "tokens_seen": 1587281920 }, { "epoch": 0.48, "learning_rate": 0.0005241534264163056, "loss": 0.0694, "theoretical_loss": 3.5270999591543752, "tokens_seen": 1587544064 }, { "epoch": 0.48, "learning_rate": 0.0005240731824747232, "loss": 0.0685, "theoretical_loss": 3.5270495803618793, "tokens_seen": 1587806208 }, { "epoch": 0.48, "learning_rate": 0.0005239929385331408, "loss": 0.0656, "theoretical_loss": 3.5269992122145952, "tokens_seen": 1588068352 }, { "epoch": 0.48, "learning_rate": 0.0005239126945915583, "loss": 0.0678, "theoretical_loss": 3.526948854708515, "tokens_seen": 1588330496 }, { "epoch": 0.48, "learning_rate": 0.000523832450649976, "loss": 0.0688, "theoretical_loss": 3.526898507839636, "tokens_seen": 1588592640 }, { "epoch": 0.48, "learning_rate": 0.0005237522067083935, "loss": 0.0674, "theoretical_loss": 3.526848171603956, "tokens_seen": 1588854784 }, { "epoch": 0.48, "learning_rate": 0.0005236719627668111, "loss": 0.0706, "theoretical_loss": 3.526797845997476, "tokens_seen": 1589116928 }, { "epoch": 0.48, "learning_rate": 0.0005235917188252287, "loss": 0.0669, "theoretical_loss": 3.5267475310161984, "tokens_seen": 1589379072 }, { "epoch": 0.48, "learning_rate": 0.0005235114748836462, "loss": 0.0713, "theoretical_loss": 3.5266972266561276, "tokens_seen": 1589641216 }, { "epoch": 0.48, "learning_rate": 0.000523431230942064, "loss": 0.0673, "theoretical_loss": 3.526646932913271, "tokens_seen": 1589903360 }, { "epoch": 0.48, "learning_rate": 0.0005233509870004815, "loss": 0.0668, "theoretical_loss": 3.5265966497836376, "tokens_seen": 1590165504 }, { "epoch": 0.48, "learning_rate": 0.0005232707430588991, "loss": 0.0726, "theoretical_loss": 3.5265463772632386, "tokens_seen": 1590427648 }, { "epoch": 0.48, "learning_rate": 0.0005231904991173167, "loss": 0.0681, "theoretical_loss": 3.5264961153480874, "tokens_seen": 1590689792 }, { "epoch": 0.48, "learning_rate": 0.0005231102551757343, "loss": 0.0706, "theoretical_loss": 3.5264458640342, "tokens_seen": 1590951936 }, { "epoch": 0.48, "learning_rate": 0.0005230300112341518, "loss": 0.0701, "theoretical_loss": 3.5263956233175935, "tokens_seen": 1591214080 }, { "epoch": 0.48, "learning_rate": 0.0005229497672925694, "loss": 0.067, "theoretical_loss": 3.5263453931942883, "tokens_seen": 1591476224 }, { "epoch": 0.48, "learning_rate": 0.000522869523350987, "loss": 0.0674, "theoretical_loss": 3.5262951736603063, "tokens_seen": 1591738368 }, { "epoch": 0.48, "learning_rate": 0.0005227892794094045, "loss": 0.069, "theoretical_loss": 3.5262449647116716, "tokens_seen": 1592000512 }, { "epoch": 0.48, "learning_rate": 0.0005227090354678223, "loss": 0.0665, "theoretical_loss": 3.5261947663444104, "tokens_seen": 1592262656 }, { "epoch": 0.48, "objective/train/advantage_avg": -0.0003691379679366946, "objective/train/docs_used": 581545, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4430354833602905, "objective/train/original_loss": 1.44303560256958, "objective/train/theoretical_loss": 3.5261445785545513, "objective/train/tokens_used": 1612984800, "objective/train/value_avg": -0.006046295166015625, "objective/train/value_loss": 0.000189928337931633, "objective/train/value_max": -3.534555435180664e-05, "objective/train/value_min": -0.60888671875, "objective/train/value_reward_corr": 0.6734293856838923, "objective/train/value_std": 0.011077880859375, "objective/train/weight_avg": 0.9997175931930542, "objective/train/weighted_lm_loss": 1.442589521408081, "objective/train/weights_max": 1.4511600732803345, "objective/train/weights_min": 0.3682490885257721, "theoretical_loss": 3.5261445785545513, "tokens_seen": 1592524800 }, { "epoch": 0.48, "learning_rate": 0.0005226287915262398, "loss": 0.0679, "theoretical_loss": 3.5261445785545513, "tokens_seen": 1592524800 }, { "epoch": 0.48, "learning_rate": 0.0005225485475846574, "loss": 0.069, "theoretical_loss": 3.526094401338125, "tokens_seen": 1592786944 }, { "epoch": 0.48, "learning_rate": 0.000522468303643075, "loss": 0.0695, "theoretical_loss": 3.526044234691165, "tokens_seen": 1593049088 }, { "epoch": 0.48, "learning_rate": 0.0005223880597014925, "loss": 0.0681, "theoretical_loss": 3.525994078609705, "tokens_seen": 1593311232 }, { "epoch": 0.48, "learning_rate": 0.0005223078157599101, "loss": 0.0694, "theoretical_loss": 3.525943933089782, "tokens_seen": 1593573376 }, { "epoch": 0.48, "learning_rate": 0.0005222275718183277, "loss": 0.0716, "theoretical_loss": 3.5258937981274365, "tokens_seen": 1593835520 }, { "epoch": 0.48, "learning_rate": 0.0005221473278767453, "loss": 0.0689, "theoretical_loss": 3.5258436737187084, "tokens_seen": 1594097664 }, { "epoch": 0.48, "learning_rate": 0.0005220670839351629, "loss": 0.0673, "theoretical_loss": 3.5257935598596424, "tokens_seen": 1594359808 }, { "epoch": 0.48, "learning_rate": 0.0005219868399935806, "loss": 0.0676, "theoretical_loss": 3.5257434565462833, "tokens_seen": 1594621952 }, { "epoch": 0.48, "learning_rate": 0.0005219065960519981, "loss": 0.0695, "theoretical_loss": 3.5256933637746792, "tokens_seen": 1594884096 }, { "epoch": 0.48, "learning_rate": 0.0005218263521104157, "loss": 0.0652, "theoretical_loss": 3.5256432815408796, "tokens_seen": 1595146240 }, { "epoch": 0.48, "learning_rate": 0.0005217461081688333, "loss": 0.0661, "theoretical_loss": 3.5255932098409364, "tokens_seen": 1595408384 }, { "epoch": 0.48, "learning_rate": 0.0005216658642272508, "loss": 0.068, "theoretical_loss": 3.5255431486709043, "tokens_seen": 1595670528 }, { "epoch": 0.48, "learning_rate": 0.0005215856202856685, "loss": 0.0651, "theoretical_loss": 3.525493098026839, "tokens_seen": 1595932672 }, { "epoch": 0.48, "learning_rate": 0.000521505376344086, "loss": 0.0693, "theoretical_loss": 3.5254430579047993, "tokens_seen": 1596194816 }, { "epoch": 0.48, "learning_rate": 0.0005214251324025036, "loss": 0.0658, "theoretical_loss": 3.5253930283008454, "tokens_seen": 1596456960 }, { "epoch": 0.48, "learning_rate": 0.0005213448884609212, "loss": 0.069, "theoretical_loss": 3.5253430092110403, "tokens_seen": 1596719104 }, { "epoch": 0.48, "learning_rate": 0.0005212646445193387, "loss": 0.067, "theoretical_loss": 3.5252930006314482, "tokens_seen": 1596981248 }, { "epoch": 0.48, "learning_rate": 0.0005211844005777564, "loss": 0.0678, "theoretical_loss": 3.5252430025581356, "tokens_seen": 1597243392 }, { "epoch": 0.48, "learning_rate": 0.000521104156636174, "loss": 0.0679, "theoretical_loss": 3.5251930149871726, "tokens_seen": 1597505536 }, { "epoch": 0.48, "learning_rate": 0.0005210239126945916, "loss": 0.0675, "theoretical_loss": 3.5251430379146296, "tokens_seen": 1597767680 }, { "epoch": 0.48, "learning_rate": 0.0005209436687530092, "loss": 0.0674, "theoretical_loss": 3.5250930713365802, "tokens_seen": 1598029824 }, { "epoch": 0.48, "learning_rate": 0.0005208634248114268, "loss": 0.0656, "theoretical_loss": 3.525043115249099, "tokens_seen": 1598291968 }, { "epoch": 0.48, "learning_rate": 0.0005207831808698443, "loss": 0.0699, "theoretical_loss": 3.5249931696482637, "tokens_seen": 1598554112 }, { "epoch": 0.48, "learning_rate": 0.0005207029369282619, "loss": 0.0698, "theoretical_loss": 3.524943234530154, "tokens_seen": 1598816256 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0008163555758073926, "objective/train/docs_used": 583998, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3744968175888062, "objective/train/original_loss": 1.3744966983795166, "objective/train/theoretical_loss": 3.5248933098908517, "objective/train/tokens_used": 1619538400, "objective/train/value_avg": -0.00650787353515625, "objective/train/value_loss": 0.000182785777724348, "objective/train/value_max": -3.24249267578125e-05, "objective/train/value_min": -0.49658203125, "objective/train/value_reward_corr": 0.6320282058663177, "objective/train/value_std": 0.0113067626953125, "objective/train/weight_avg": 1.0009000301361084, "objective/train/weighted_lm_loss": 1.3754725456237793, "objective/train/weights_max": 1.3192977905273438, "objective/train/weights_min": 0.3684886395931244, "theoretical_loss": 3.5248933098908517, "tokens_seen": 1599078400 }, { "epoch": 0.48, "learning_rate": 0.0005206226929866795, "loss": 0.0687, "theoretical_loss": 3.5248933098908517, "tokens_seen": 1599078400 }, { "epoch": 0.48, "learning_rate": 0.000520542449045097, "loss": 0.0682, "theoretical_loss": 3.5248433957264402, "tokens_seen": 1599340544 }, { "epoch": 0.48, "learning_rate": 0.0005204622051035148, "loss": 0.0703, "theoretical_loss": 3.5247934920330053, "tokens_seen": 1599602688 }, { "epoch": 0.48, "learning_rate": 0.0005203819611619323, "loss": 0.0702, "theoretical_loss": 3.5247435988066353, "tokens_seen": 1599864832 }, { "epoch": 0.48, "learning_rate": 0.0005203017172203499, "loss": 0.0694, "theoretical_loss": 3.52469371604342, "tokens_seen": 1600126976 }, { "epoch": 0.48, "learning_rate": 0.0005202214732787675, "loss": 0.0692, "theoretical_loss": 3.524643843739452, "tokens_seen": 1600389120 }, { "epoch": 0.49, "learning_rate": 0.0005201412293371851, "loss": 0.0727, "theoretical_loss": 3.524593981890825, "tokens_seen": 1600651264 }, { "epoch": 0.49, "learning_rate": 0.0005200609853956026, "loss": 0.069, "theoretical_loss": 3.524544130493635, "tokens_seen": 1600913408 }, { "epoch": 0.49, "learning_rate": 0.0005199807414540202, "loss": 0.0686, "theoretical_loss": 3.5244942895439815, "tokens_seen": 1601175552 }, { "epoch": 0.49, "learning_rate": 0.0005199004975124378, "loss": 0.069, "theoretical_loss": 3.524444459037965, "tokens_seen": 1601437696 }, { "epoch": 0.49, "learning_rate": 0.0005198202535708553, "loss": 0.0686, "theoretical_loss": 3.5243946389716867, "tokens_seen": 1601699840 }, { "epoch": 0.49, "learning_rate": 0.000519740009629273, "loss": 0.0704, "theoretical_loss": 3.5243448293412527, "tokens_seen": 1601961984 }, { "epoch": 0.49, "learning_rate": 0.0005196597656876906, "loss": 0.0705, "theoretical_loss": 3.5242950301427696, "tokens_seen": 1602224128 }, { "epoch": 0.49, "learning_rate": 0.0005195795217461083, "loss": 0.0675, "theoretical_loss": 3.5242452413723457, "tokens_seen": 1602486272 }, { "epoch": 0.49, "learning_rate": 0.0005194992778045258, "loss": 0.069, "theoretical_loss": 3.5241954630260923, "tokens_seen": 1602748416 }, { "epoch": 0.49, "learning_rate": 0.0005194190338629433, "loss": 0.0653, "theoretical_loss": 3.524145695100123, "tokens_seen": 1603010560 }, { "epoch": 0.49, "learning_rate": 0.000519338789921361, "loss": 0.0685, "theoretical_loss": 3.5240959375905527, "tokens_seen": 1603272704 }, { "epoch": 0.49, "learning_rate": 0.0005192585459797785, "loss": 0.0689, "theoretical_loss": 3.5240461904934977, "tokens_seen": 1603534848 }, { "epoch": 0.49, "learning_rate": 0.0005191783020381961, "loss": 0.0696, "theoretical_loss": 3.5239964538050788, "tokens_seen": 1603796992 }, { "epoch": 0.49, "learning_rate": 0.0005190980580966137, "loss": 0.0691, "theoretical_loss": 3.5239467275214165, "tokens_seen": 1604059136 }, { "epoch": 0.49, "learning_rate": 0.0005190178141550314, "loss": 0.0707, "theoretical_loss": 3.523897011638635, "tokens_seen": 1604321280 }, { "epoch": 0.49, "learning_rate": 0.0005189375702134489, "loss": 0.069, "theoretical_loss": 3.5238473061528586, "tokens_seen": 1604583424 }, { "epoch": 0.49, "learning_rate": 0.0005188573262718665, "loss": 0.066, "theoretical_loss": 3.5237976110602163, "tokens_seen": 1604845568 }, { "epoch": 0.49, "learning_rate": 0.0005187770823302841, "loss": 0.0714, "theoretical_loss": 3.523747926356837, "tokens_seen": 1605107712 }, { "epoch": 0.49, "learning_rate": 0.0005186968383887016, "loss": 0.0684, "theoretical_loss": 3.523698252038853, "tokens_seen": 1605369856 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0014611550141125917, "objective/train/docs_used": 586391, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3702470064163208, "objective/train/original_loss": 1.3702466487884521, "objective/train/theoretical_loss": 3.5236485881023976, "objective/train/tokens_used": 1626092000, "objective/train/value_avg": -0.006923675537109375, "objective/train/value_loss": 0.00013820224558003247, "objective/train/value_max": -2.6464462280273438e-05, "objective/train/value_min": -0.95068359375, "objective/train/value_reward_corr": 0.6985552108151838, "objective/train/value_std": 0.01245880126953125, "objective/train/weight_avg": 1.0015236139297485, "objective/train/weighted_lm_loss": 1.3721559047698975, "objective/train/weights_max": 1.1226495504379272, "objective/train/weights_min": 0.3682887554168701, "theoretical_loss": 3.5236485881023976, "tokens_seen": 1605632000 }, { "epoch": 0.49, "learning_rate": 0.0005186165944471193, "loss": 0.0676, "theoretical_loss": 3.5236485881023976, "tokens_seen": 1605632000 }, { "epoch": 0.49, "learning_rate": 0.0005185363505055368, "loss": 0.0671, "theoretical_loss": 3.5235989345436076, "tokens_seen": 1605894144 }, { "epoch": 0.49, "learning_rate": 0.0005184561065639545, "loss": 0.0693, "theoretical_loss": 3.52354929135862, "tokens_seen": 1606156288 }, { "epoch": 0.49, "learning_rate": 0.000518375862622372, "loss": 0.0665, "theoretical_loss": 3.5234996585435754, "tokens_seen": 1606418432 }, { "epoch": 0.49, "learning_rate": 0.0005182956186807895, "loss": 0.0686, "theoretical_loss": 3.5234500360946157, "tokens_seen": 1606680576 }, { "epoch": 0.49, "learning_rate": 0.0005182153747392073, "loss": 0.0663, "theoretical_loss": 3.5234004240078853, "tokens_seen": 1606942720 }, { "epoch": 0.49, "learning_rate": 0.0005181351307976248, "loss": 0.069, "theoretical_loss": 3.5233508222795304, "tokens_seen": 1607204864 }, { "epoch": 0.49, "learning_rate": 0.0005180548868560424, "loss": 0.0673, "theoretical_loss": 3.5233012309057, "tokens_seen": 1607467008 }, { "epoch": 0.49, "learning_rate": 0.00051797464291446, "loss": 0.0685, "theoretical_loss": 3.5232516498825426, "tokens_seen": 1607729152 }, { "epoch": 0.49, "learning_rate": 0.0005178943989728776, "loss": 0.0706, "theoretical_loss": 3.5232020792062126, "tokens_seen": 1607991296 }, { "epoch": 0.49, "learning_rate": 0.0005178141550312951, "loss": 0.0671, "theoretical_loss": 3.523152518872864, "tokens_seen": 1608253440 }, { "epoch": 0.49, "learning_rate": 0.0005177339110897127, "loss": 0.0694, "theoretical_loss": 3.5231029688786526, "tokens_seen": 1608515584 }, { "epoch": 0.49, "learning_rate": 0.0005176536671481303, "loss": 0.0692, "theoretical_loss": 3.523053429219738, "tokens_seen": 1608777728 }, { "epoch": 0.49, "learning_rate": 0.0005175734232065478, "loss": 0.0743, "theoretical_loss": 3.52300389989228, "tokens_seen": 1609039872 }, { "epoch": 0.49, "learning_rate": 0.0005174931792649656, "loss": 0.0716, "theoretical_loss": 3.5229543808924415, "tokens_seen": 1609302016 }, { "epoch": 0.49, "learning_rate": 0.0005174129353233831, "loss": 0.0684, "theoretical_loss": 3.522904872216388, "tokens_seen": 1609564160 }, { "epoch": 0.49, "learning_rate": 0.0005173326913818007, "loss": 0.0689, "theoretical_loss": 3.5228553738602857, "tokens_seen": 1609826304 }, { "epoch": 0.49, "learning_rate": 0.0005172524474402183, "loss": 0.0705, "theoretical_loss": 3.5228058858203033, "tokens_seen": 1610088448 }, { "epoch": 0.49, "learning_rate": 0.0005171722034986359, "loss": 0.0704, "theoretical_loss": 3.5227564080926115, "tokens_seen": 1610350592 }, { "epoch": 0.49, "learning_rate": 0.0005170919595570535, "loss": 0.0698, "theoretical_loss": 3.5227069406733844, "tokens_seen": 1610612736 }, { "epoch": 0.49, "learning_rate": 0.000517011715615471, "loss": 0.0708, "theoretical_loss": 3.5226574835587963, "tokens_seen": 1610874880 }, { "epoch": 0.49, "learning_rate": 0.0005169314716738886, "loss": 0.0676, "theoretical_loss": 3.522608036745024, "tokens_seen": 1611137024 }, { "epoch": 0.49, "learning_rate": 0.0005168512277323062, "loss": 0.0698, "theoretical_loss": 3.5225586002282467, "tokens_seen": 1611399168 }, { "epoch": 0.49, "learning_rate": 0.0005167709837907239, "loss": 0.0702, "theoretical_loss": 3.5225091740046457, "tokens_seen": 1611661312 }, { "epoch": 0.49, "learning_rate": 0.0005166907398491414, "loss": 0.072, "theoretical_loss": 3.5224597580704033, "tokens_seen": 1611923456 }, { "epoch": 0.49, "objective/train/advantage_avg": -0.0002596320991870016, "objective/train/docs_used": 588885, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3582322597503662, "objective/train/original_loss": 1.3582322597503662, "objective/train/theoretical_loss": 3.5224103524217067, "objective/train/tokens_used": 1632645600, "objective/train/value_avg": -0.008026123046875, "objective/train/value_loss": 0.0002943206636700779, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.91357421875, "objective/train/value_reward_corr": 0.7926579179126808, "objective/train/value_std": 0.0210723876953125, "objective/train/weight_avg": 0.9998724460601807, "objective/train/weighted_lm_loss": 1.3586115837097168, "objective/train/weights_max": 1.6367690563201904, "objective/train/weights_min": 0.3682592809200287, "theoretical_loss": 3.5224103524217067, "tokens_seen": 1612185600 }, { "epoch": 0.49, "learning_rate": 0.0005166104959075591, "loss": 0.0693, "theoretical_loss": 3.5224103524217067, "tokens_seen": 1612185600 }, { "epoch": 0.49, "learning_rate": 0.0005165302519659766, "loss": 0.0705, "theoretical_loss": 3.522360957054741, "tokens_seen": 1612447744 }, { "epoch": 0.49, "learning_rate": 0.0005164500080243941, "loss": 0.0669, "theoretical_loss": 3.5223115719656963, "tokens_seen": 1612709888 }, { "epoch": 0.49, "learning_rate": 0.0005163697640828118, "loss": 0.0705, "theoretical_loss": 3.5222621971507646, "tokens_seen": 1612972032 }, { "epoch": 0.49, "learning_rate": 0.0005162895201412293, "loss": 0.0695, "theoretical_loss": 3.522212832606138, "tokens_seen": 1613234176 }, { "epoch": 0.49, "learning_rate": 0.0005162092761996469, "loss": 0.0714, "theoretical_loss": 3.5221634783280122, "tokens_seen": 1613496320 }, { "epoch": 0.49, "learning_rate": 0.0005161290322580645, "loss": 0.0691, "theoretical_loss": 3.5221141343125852, "tokens_seen": 1613758464 }, { "epoch": 0.49, "learning_rate": 0.0005160487883164821, "loss": 0.0713, "theoretical_loss": 3.5220648005560555, "tokens_seen": 1614020608 }, { "epoch": 0.49, "learning_rate": 0.0005159685443748997, "loss": 0.0748, "theoretical_loss": 3.522015477054625, "tokens_seen": 1614282752 }, { "epoch": 0.49, "learning_rate": 0.0005158883004333173, "loss": 0.0729, "theoretical_loss": 3.521966163804497, "tokens_seen": 1614544896 }, { "epoch": 0.49, "learning_rate": 0.0005158080564917349, "loss": 0.0708, "theoretical_loss": 3.521916860801877, "tokens_seen": 1614807040 }, { "epoch": 0.49, "learning_rate": 0.0005157278125501525, "loss": 0.0704, "theoretical_loss": 3.521867568042973, "tokens_seen": 1615069184 }, { "epoch": 0.49, "learning_rate": 0.0005156475686085701, "loss": 0.07, "theoretical_loss": 3.5218182855239935, "tokens_seen": 1615331328 }, { "epoch": 0.49, "learning_rate": 0.0005155673246669876, "loss": 0.0683, "theoretical_loss": 3.5217690132411508, "tokens_seen": 1615593472 }, { "epoch": 0.49, "learning_rate": 0.0005154870807254053, "loss": 0.0708, "theoretical_loss": 3.521719751190658, "tokens_seen": 1615855616 }, { "epoch": 0.49, "learning_rate": 0.0005154068367838228, "loss": 0.0696, "theoretical_loss": 3.5216704993687307, "tokens_seen": 1616117760 }, { "epoch": 0.49, "learning_rate": 0.0005153265928422403, "loss": 0.0694, "theoretical_loss": 3.5216212577715873, "tokens_seen": 1616379904 }, { "epoch": 0.49, "learning_rate": 0.0005152463489006581, "loss": 0.0711, "theoretical_loss": 3.5215720263954458, "tokens_seen": 1616642048 }, { "epoch": 0.49, "learning_rate": 0.0005151661049590756, "loss": 0.0692, "theoretical_loss": 3.521522805236529, "tokens_seen": 1616904192 }, { "epoch": 0.49, "learning_rate": 0.0005150858610174932, "loss": 0.0725, "theoretical_loss": 3.52147359429106, "tokens_seen": 1617166336 }, { "epoch": 0.49, "learning_rate": 0.0005150056170759108, "loss": 0.0708, "theoretical_loss": 3.5214243935552654, "tokens_seen": 1617428480 }, { "epoch": 0.49, "learning_rate": 0.0005149253731343284, "loss": 0.0664, "theoretical_loss": 3.5213752030253715, "tokens_seen": 1617690624 }, { "epoch": 0.49, "learning_rate": 0.0005148451291927459, "loss": 0.07, "theoretical_loss": 3.5213260226976084, "tokens_seen": 1617952768 }, { "epoch": 0.49, "learning_rate": 0.0005147648852511635, "loss": 0.0702, "theoretical_loss": 3.5212768525682074, "tokens_seen": 1618214912 }, { "epoch": 0.49, "learning_rate": 0.0005146846413095811, "loss": 0.0714, "theoretical_loss": 3.5212276926334027, "tokens_seen": 1618477056 }, { "epoch": 0.49, "objective/train/advantage_avg": -8.634123514639214e-05, "objective/train/docs_used": 591377, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2014659643173218, "objective/train/original_loss": 1.2014658451080322, "objective/train/theoretical_loss": 3.5211785428894293, "objective/train/tokens_used": 1639199200, "objective/train/value_avg": -0.007335662841796875, "objective/train/value_loss": 0.00014820354408584535, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.5302734375, "objective/train/value_reward_corr": 0.7609983102667354, "objective/train/value_std": 0.0133056640625, "objective/train/weight_avg": 0.9999821782112122, "objective/train/weighted_lm_loss": 1.2018018960952759, "objective/train/weights_max": 1.1847867965698242, "objective/train/weights_min": 0.3695588707923889, "theoretical_loss": 3.5211785428894293, "tokens_seen": 1618739200 }, { "epoch": 0.49, "learning_rate": 0.0005146043973679986, "loss": 0.0672, "theoretical_loss": 3.5211785428894293, "tokens_seen": 1618739200 }, { "epoch": 0.49, "learning_rate": 0.0005145241534264164, "loss": 0.0719, "theoretical_loss": 3.5211294033325258, "tokens_seen": 1619001344 }, { "epoch": 0.49, "learning_rate": 0.0005144439094848339, "loss": 0.0687, "theoretical_loss": 3.5210802739589306, "tokens_seen": 1619263488 }, { "epoch": 0.49, "learning_rate": 0.0005143636655432516, "loss": 0.0697, "theoretical_loss": 3.521031154764886, "tokens_seen": 1619525632 }, { "epoch": 0.49, "learning_rate": 0.0005142834216016691, "loss": 0.0707, "theoretical_loss": 3.520982045746636, "tokens_seen": 1619787776 }, { "epoch": 0.49, "learning_rate": 0.0005142031776600867, "loss": 0.0699, "theoretical_loss": 3.5209329469004254, "tokens_seen": 1620049920 }, { "epoch": 0.49, "learning_rate": 0.0005141229337185043, "loss": 0.0715, "theoretical_loss": 3.5208838582225024, "tokens_seen": 1620312064 }, { "epoch": 0.49, "learning_rate": 0.0005140426897769218, "loss": 0.0685, "theoretical_loss": 3.5208347797091157, "tokens_seen": 1620574208 }, { "epoch": 0.49, "learning_rate": 0.0005139624458353394, "loss": 0.0681, "theoretical_loss": 3.5207857113565177, "tokens_seen": 1620836352 }, { "epoch": 0.49, "learning_rate": 0.000513882201893757, "loss": 0.0692, "theoretical_loss": 3.520736653160962, "tokens_seen": 1621098496 }, { "epoch": 0.49, "learning_rate": 0.0005138019579521747, "loss": 0.0693, "theoretical_loss": 3.520687605118704, "tokens_seen": 1621360640 }, { "epoch": 0.49, "learning_rate": 0.0005137217140105922, "loss": 0.072, "theoretical_loss": 3.520638567226001, "tokens_seen": 1621622784 }, { "epoch": 0.49, "learning_rate": 0.0005136414700690099, "loss": 0.0697, "theoretical_loss": 3.5205895394791127, "tokens_seen": 1621884928 }, { "epoch": 0.49, "learning_rate": 0.0005135612261274274, "loss": 0.0686, "theoretical_loss": 3.5205405218743007, "tokens_seen": 1622147072 }, { "epoch": 0.49, "learning_rate": 0.0005134809821858449, "loss": 0.0702, "theoretical_loss": 3.520491514407828, "tokens_seen": 1622409216 }, { "epoch": 0.49, "learning_rate": 0.0005134007382442626, "loss": 0.0702, "theoretical_loss": 3.520442517075961, "tokens_seen": 1622671360 }, { "epoch": 0.49, "learning_rate": 0.0005133204943026801, "loss": 0.068, "theoretical_loss": 3.5203935298749656, "tokens_seen": 1622933504 }, { "epoch": 0.49, "learning_rate": 0.0005132402503610978, "loss": 0.0689, "theoretical_loss": 3.520344552801113, "tokens_seen": 1623195648 }, { "epoch": 0.49, "learning_rate": 0.0005131600064195153, "loss": 0.0704, "theoretical_loss": 3.5202955858506737, "tokens_seen": 1623457792 }, { "epoch": 0.49, "learning_rate": 0.000513079762477933, "loss": 0.0726, "theoretical_loss": 3.520246629019921, "tokens_seen": 1623719936 }, { "epoch": 0.49, "learning_rate": 0.0005129995185363506, "loss": 0.0694, "theoretical_loss": 3.5201976823051306, "tokens_seen": 1623982080 }, { "epoch": 0.49, "learning_rate": 0.0005129192745947681, "loss": 0.0711, "theoretical_loss": 3.5201487457025795, "tokens_seen": 1624244224 }, { "epoch": 0.49, "learning_rate": 0.0005128390306531857, "loss": 0.0694, "theoretical_loss": 3.5200998192085473, "tokens_seen": 1624506368 }, { "epoch": 0.49, "learning_rate": 0.0005127587867116033, "loss": 0.069, "theoretical_loss": 3.5200509028193148, "tokens_seen": 1624768512 }, { "epoch": 0.49, "learning_rate": 0.0005126785427700209, "loss": 0.0723, "theoretical_loss": 3.5200019965311657, "tokens_seen": 1625030656 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0010418968740850687, "objective/train/docs_used": 593743, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.481257438659668, "objective/train/original_loss": 1.481257438659668, "objective/train/theoretical_loss": 3.519953100340385, "objective/train/tokens_used": 1645752800, "objective/train/value_avg": -0.0069732666015625, "objective/train/value_loss": 0.00027382862754166126, "objective/train/value_max": -6.502866744995117e-05, "objective/train/value_min": -0.8955078125, "objective/train/value_reward_corr": 0.6842025943888881, "objective/train/value_std": 0.01538848876953125, "objective/train/weight_avg": 1.0011622905731201, "objective/train/weighted_lm_loss": 1.4833272695541382, "objective/train/weights_max": 1.6334962844848633, "objective/train/weights_min": 0.36918336153030396, "theoretical_loss": 3.519953100340385, "tokens_seen": 1625292800 }, { "epoch": 0.49, "learning_rate": 0.0005125982988284384, "loss": 0.0712, "theoretical_loss": 3.519953100340385, "tokens_seen": 1625292800 }, { "epoch": 0.49, "learning_rate": 0.0005125180548868561, "loss": 0.073, "theoretical_loss": 3.51990421424326, "tokens_seen": 1625554944 }, { "epoch": 0.49, "learning_rate": 0.0005124378109452736, "loss": 0.0703, "theoretical_loss": 3.519855338236079, "tokens_seen": 1625817088 }, { "epoch": 0.49, "learning_rate": 0.0005123575670036911, "loss": 0.0687, "theoretical_loss": 3.5198064723151345, "tokens_seen": 1626079232 }, { "epoch": 0.49, "learning_rate": 0.0005122773230621089, "loss": 0.0707, "theoretical_loss": 3.519757616476719, "tokens_seen": 1626341376 }, { "epoch": 0.49, "learning_rate": 0.0005121970791205264, "loss": 0.0693, "theoretical_loss": 3.519708770717126, "tokens_seen": 1626603520 }, { "epoch": 0.49, "learning_rate": 0.000512116835178944, "loss": 0.0718, "theoretical_loss": 3.519659935032655, "tokens_seen": 1626865664 }, { "epoch": 0.49, "learning_rate": 0.0005120365912373616, "loss": 0.0708, "theoretical_loss": 3.5196111094196034, "tokens_seen": 1627127808 }, { "epoch": 0.49, "learning_rate": 0.0005119563472957792, "loss": 0.0684, "theoretical_loss": 3.5195622938742726, "tokens_seen": 1627389952 }, { "epoch": 0.49, "learning_rate": 0.0005118761033541968, "loss": 0.0713, "theoretical_loss": 3.519513488392965, "tokens_seen": 1627652096 }, { "epoch": 0.49, "learning_rate": 0.0005117958594126143, "loss": 0.0706, "theoretical_loss": 3.5194646929719853, "tokens_seen": 1627914240 }, { "epoch": 0.49, "learning_rate": 0.0005117156154710319, "loss": 0.0706, "theoretical_loss": 3.519415907607641, "tokens_seen": 1628176384 }, { "epoch": 0.49, "learning_rate": 0.0005116353715294495, "loss": 0.0682, "theoretical_loss": 3.5193671322962397, "tokens_seen": 1628438528 }, { "epoch": 0.49, "learning_rate": 0.0005115551275878672, "loss": 0.0704, "theoretical_loss": 3.519318367034093, "tokens_seen": 1628700672 }, { "epoch": 0.49, "learning_rate": 0.0005114748836462847, "loss": 0.0693, "theoretical_loss": 3.519269611817513, "tokens_seen": 1628962816 }, { "epoch": 0.49, "learning_rate": 0.0005113946397047024, "loss": 0.0713, "theoretical_loss": 3.5192208666428146, "tokens_seen": 1629224960 }, { "epoch": 0.49, "learning_rate": 0.0005113143957631199, "loss": 0.0703, "theoretical_loss": 3.5191721315063136, "tokens_seen": 1629487104 }, { "epoch": 0.49, "learning_rate": 0.0005112341518215374, "loss": 0.0682, "theoretical_loss": 3.5191234064043293, "tokens_seen": 1629749248 }, { "epoch": 0.49, "learning_rate": 0.0005111539078799551, "loss": 0.0679, "theoretical_loss": 3.5190746913331816, "tokens_seen": 1630011392 }, { "epoch": 0.49, "learning_rate": 0.0005110736639383726, "loss": 0.0705, "theoretical_loss": 3.5190259862891926, "tokens_seen": 1630273536 }, { "epoch": 0.49, "learning_rate": 0.0005109934199967902, "loss": 0.0683, "theoretical_loss": 3.518977291268686, "tokens_seen": 1630535680 }, { "epoch": 0.49, "learning_rate": 0.0005109131760552078, "loss": 0.0708, "theoretical_loss": 3.5189286062679894, "tokens_seen": 1630797824 }, { "epoch": 0.49, "learning_rate": 0.0005108329321136255, "loss": 0.0697, "theoretical_loss": 3.51887993128343, "tokens_seen": 1631059968 }, { "epoch": 0.49, "learning_rate": 0.0005107526881720431, "loss": 0.0661, "theoretical_loss": 3.518831266311339, "tokens_seen": 1631322112 }, { "epoch": 0.49, "learning_rate": 0.0005106724442304607, "loss": 0.0705, "theoretical_loss": 3.518782611348046, "tokens_seen": 1631584256 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0007258797995746136, "objective/train/docs_used": 596193, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4411818981170654, "objective/train/original_loss": 1.4411818981170654, "objective/train/theoretical_loss": 3.5187339663898873, "objective/train/tokens_used": 1652306400, "objective/train/value_avg": -0.005550384521484375, "objective/train/value_loss": 0.00012820863048546016, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.69775390625, "objective/train/value_reward_corr": 0.6792193647270048, "objective/train/value_std": 0.01087188720703125, "objective/train/weight_avg": 1.0007840394973755, "objective/train/weighted_lm_loss": 1.4419759511947632, "objective/train/weights_max": 1.2253751754760742, "objective/train/weights_min": 0.36889898777008057, "theoretical_loss": 3.5187339663898873, "tokens_seen": 1631846400 }, { "epoch": 0.49, "learning_rate": 0.0005105922002888782, "loss": 0.0707, "theoretical_loss": 3.5187339663898873, "tokens_seen": 1631846400 }, { "epoch": 0.49, "learning_rate": 0.0005105119563472958, "loss": 0.071, "theoretical_loss": 3.5186853314331974, "tokens_seen": 1632108544 }, { "epoch": 0.49, "learning_rate": 0.0005104317124057134, "loss": 0.0703, "theoretical_loss": 3.5186367064743145, "tokens_seen": 1632370688 }, { "epoch": 0.49, "learning_rate": 0.0005103514684641309, "loss": 0.073, "theoretical_loss": 3.518588091509578, "tokens_seen": 1632632832 }, { "epoch": 0.49, "learning_rate": 0.0005102712245225486, "loss": 0.0695, "theoretical_loss": 3.51853948653533, "tokens_seen": 1632894976 }, { "epoch": 0.49, "learning_rate": 0.0005101909805809661, "loss": 0.0688, "theoretical_loss": 3.5184908915479145, "tokens_seen": 1633157120 }, { "epoch": 0.49, "learning_rate": 0.0005101107366393837, "loss": 0.0696, "theoretical_loss": 3.5184423065436756, "tokens_seen": 1633419264 }, { "epoch": 0.5, "learning_rate": 0.0005100304926978014, "loss": 0.0693, "theoretical_loss": 3.5183937315189615, "tokens_seen": 1633681408 }, { "epoch": 0.5, "learning_rate": 0.0005099502487562189, "loss": 0.068, "theoretical_loss": 3.5183451664701217, "tokens_seen": 1633943552 }, { "epoch": 0.5, "learning_rate": 0.0005098700048146365, "loss": 0.0693, "theoretical_loss": 3.5182966113935072, "tokens_seen": 1634205696 }, { "epoch": 0.5, "learning_rate": 0.0005097897608730541, "loss": 0.0696, "theoretical_loss": 3.518248066285471, "tokens_seen": 1634467840 }, { "epoch": 0.5, "learning_rate": 0.0005097095169314717, "loss": 0.0675, "theoretical_loss": 3.518199531142369, "tokens_seen": 1634729984 }, { "epoch": 0.5, "learning_rate": 0.0005096292729898892, "loss": 0.0704, "theoretical_loss": 3.518151005960557, "tokens_seen": 1634992128 }, { "epoch": 0.5, "learning_rate": 0.0005095490290483069, "loss": 0.0699, "theoretical_loss": 3.5181024907363945, "tokens_seen": 1635254272 }, { "epoch": 0.5, "learning_rate": 0.0005094687851067244, "loss": 0.0691, "theoretical_loss": 3.518053985466243, "tokens_seen": 1635516416 }, { "epoch": 0.5, "learning_rate": 0.000509388541165142, "loss": 0.0662, "theoretical_loss": 3.518005490146464, "tokens_seen": 1635778560 }, { "epoch": 0.5, "learning_rate": 0.0005093082972235597, "loss": 0.0658, "theoretical_loss": 3.5179570047734225, "tokens_seen": 1636040704 }, { "epoch": 0.5, "learning_rate": 0.0005092280532819772, "loss": 0.0732, "theoretical_loss": 3.5179085293434857, "tokens_seen": 1636302848 }, { "epoch": 0.5, "learning_rate": 0.0005091478093403949, "loss": 0.0653, "theoretical_loss": 3.517860063853022, "tokens_seen": 1636564992 }, { "epoch": 0.5, "learning_rate": 0.0005090675653988124, "loss": 0.0694, "theoretical_loss": 3.517811608298401, "tokens_seen": 1636827136 }, { "epoch": 0.5, "learning_rate": 0.00050898732145723, "loss": 0.0706, "theoretical_loss": 3.5177631626759958, "tokens_seen": 1637089280 }, { "epoch": 0.5, "learning_rate": 0.0005089070775156476, "loss": 0.0665, "theoretical_loss": 3.5177147269821805, "tokens_seen": 1637351424 }, { "epoch": 0.5, "learning_rate": 0.0005088268335740651, "loss": 0.0678, "theoretical_loss": 3.517666301213331, "tokens_seen": 1637613568 }, { "epoch": 0.5, "learning_rate": 0.0005087465896324827, "loss": 0.0691, "theoretical_loss": 3.5176178853658246, "tokens_seen": 1637875712 }, { "epoch": 0.5, "learning_rate": 0.0005086663456909003, "loss": 0.0713, "theoretical_loss": 3.5175694794360424, "tokens_seen": 1638137856 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0005885313730686903, "objective/train/docs_used": 598618, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3161839246749878, "objective/train/original_loss": 1.3161839246749878, "objective/train/theoretical_loss": 3.5175210834203656, "objective/train/tokens_used": 1658860000, "objective/train/value_avg": -0.00928497314453125, "objective/train/value_loss": 0.0003415115352254361, "objective/train/value_max": -7.140636444091797e-05, "objective/train/value_min": -0.94189453125, "objective/train/value_reward_corr": 0.7613534650600375, "objective/train/value_std": 0.0201263427734375, "objective/train/weight_avg": 1.0007390975952148, "objective/train/weighted_lm_loss": 1.3172998428344727, "objective/train/weights_max": 1.4789870977401733, "objective/train/weights_min": 0.3708581030368805, "theoretical_loss": 3.5175210834203656, "tokens_seen": 1638400000 }, { "epoch": 0.5, "learning_rate": 0.000508586101749318, "loss": 0.0704, "theoretical_loss": 3.5175210834203656, "tokens_seen": 1638400000 }, { "epoch": 0.5, "learning_rate": 0.0005085058578077355, "loss": 0.0736, "theoretical_loss": 3.5174726973151778, "tokens_seen": 1638662144 }, { "epoch": 0.5, "learning_rate": 0.0005084256138661532, "loss": 0.0679, "theoretical_loss": 3.517424321116865, "tokens_seen": 1638924288 }, { "epoch": 0.5, "learning_rate": 0.0005083453699245707, "loss": 0.071, "theoretical_loss": 3.517375954821815, "tokens_seen": 1639186432 }, { "epoch": 0.5, "learning_rate": 0.0005082651259829882, "loss": 0.0691, "theoretical_loss": 3.517327598426416, "tokens_seen": 1639448576 }, { "epoch": 0.5, "learning_rate": 0.0005081848820414059, "loss": 0.0685, "theoretical_loss": 3.5172792519270604, "tokens_seen": 1639710720 }, { "epoch": 0.5, "learning_rate": 0.0005081046380998234, "loss": 0.0716, "theoretical_loss": 3.517230915320141, "tokens_seen": 1639972864 }, { "epoch": 0.5, "learning_rate": 0.0005080243941582411, "loss": 0.07, "theoretical_loss": 3.5171825886020525, "tokens_seen": 1640235008 }, { "epoch": 0.5, "learning_rate": 0.0005079441502166586, "loss": 0.0673, "theoretical_loss": 3.5171342717691925, "tokens_seen": 1640497152 }, { "epoch": 0.5, "learning_rate": 0.0005078639062750763, "loss": 0.0675, "theoretical_loss": 3.517085964817959, "tokens_seen": 1640759296 }, { "epoch": 0.5, "learning_rate": 0.0005077836623334939, "loss": 0.0694, "theoretical_loss": 3.517037667744754, "tokens_seen": 1641021440 }, { "epoch": 0.5, "learning_rate": 0.0005077034183919115, "loss": 0.0687, "theoretical_loss": 3.516989380545979, "tokens_seen": 1641283584 }, { "epoch": 0.5, "learning_rate": 0.000507623174450329, "loss": 0.0708, "theoretical_loss": 3.5169411032180387, "tokens_seen": 1641545728 }, { "epoch": 0.5, "learning_rate": 0.0005075429305087466, "loss": 0.0713, "theoretical_loss": 3.51689283575734, "tokens_seen": 1641807872 }, { "epoch": 0.5, "learning_rate": 0.0005074626865671642, "loss": 0.0724, "theoretical_loss": 3.516844578160291, "tokens_seen": 1642070016 }, { "epoch": 0.5, "learning_rate": 0.0005073824426255817, "loss": 0.0684, "theoretical_loss": 3.5167963304233014, "tokens_seen": 1642332160 }, { "epoch": 0.5, "learning_rate": 0.0005073021986839994, "loss": 0.0702, "theoretical_loss": 3.516748092542784, "tokens_seen": 1642594304 }, { "epoch": 0.5, "learning_rate": 0.0005072219547424169, "loss": 0.0718, "theoretical_loss": 3.5166998645151515, "tokens_seen": 1642856448 }, { "epoch": 0.5, "learning_rate": 0.0005071417108008345, "loss": 0.0721, "theoretical_loss": 3.516651646336821, "tokens_seen": 1643118592 }, { "epoch": 0.5, "learning_rate": 0.0005070614668592522, "loss": 0.0703, "theoretical_loss": 3.5166034380042093, "tokens_seen": 1643380736 }, { "epoch": 0.5, "learning_rate": 0.0005069812229176697, "loss": 0.0701, "theoretical_loss": 3.5165552395137363, "tokens_seen": 1643642880 }, { "epoch": 0.5, "learning_rate": 0.0005069009789760874, "loss": 0.0681, "theoretical_loss": 3.516507050861823, "tokens_seen": 1643905024 }, { "epoch": 0.5, "learning_rate": 0.0005068207350345049, "loss": 0.0671, "theoretical_loss": 3.5164588720448937, "tokens_seen": 1644167168 }, { "epoch": 0.5, "learning_rate": 0.0005067404910929225, "loss": 0.0699, "theoretical_loss": 3.5164107030593725, "tokens_seen": 1644429312 }, { "epoch": 0.5, "learning_rate": 0.0005066602471513401, "loss": 0.0707, "theoretical_loss": 3.516362543901687, "tokens_seen": 1644691456 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.001454401994124055, "objective/train/docs_used": 601010, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3577359914779663, "objective/train/original_loss": 1.3577358722686768, "objective/train/theoretical_loss": 3.5163143945682656, "objective/train/tokens_used": 1665413600, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.00015290330338757485, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.483642578125, "objective/train/value_reward_corr": 0.7295783796450883, "objective/train/value_std": 0.01434326171875, "objective/train/weight_avg": 1.0015279054641724, "objective/train/weighted_lm_loss": 1.3596255779266357, "objective/train/weights_max": 1.3309441804885864, "objective/train/weights_min": 0.369488388299942, "theoretical_loss": 3.5163143945682656, "tokens_seen": 1644953600 }, { "epoch": 0.5, "learning_rate": 0.0005065800032097577, "loss": 0.0724, "theoretical_loss": 3.5163143945682656, "tokens_seen": 1644953600 }, { "epoch": 0.5, "learning_rate": 0.0005064997592681752, "loss": 0.0716, "theoretical_loss": 3.5162662550555392, "tokens_seen": 1645215744 }, { "epoch": 0.5, "learning_rate": 0.0005064195153265928, "loss": 0.0721, "theoretical_loss": 3.5162181253599405, "tokens_seen": 1645477888 }, { "epoch": 0.5, "learning_rate": 0.0005063392713850105, "loss": 0.0704, "theoretical_loss": 3.516170005477904, "tokens_seen": 1645740032 }, { "epoch": 0.5, "learning_rate": 0.000506259027443428, "loss": 0.0698, "theoretical_loss": 3.516121895405866, "tokens_seen": 1646002176 }, { "epoch": 0.5, "learning_rate": 0.0005061787835018457, "loss": 0.0733, "theoretical_loss": 3.516073795140265, "tokens_seen": 1646264320 }, { "epoch": 0.5, "learning_rate": 0.0005060985395602632, "loss": 0.0693, "theoretical_loss": 3.5160257046775407, "tokens_seen": 1646526464 }, { "epoch": 0.5, "learning_rate": 0.0005060182956186808, "loss": 0.0695, "theoretical_loss": 3.515977624014135, "tokens_seen": 1646788608 }, { "epoch": 0.5, "learning_rate": 0.0005059380516770984, "loss": 0.0682, "theoretical_loss": 3.515929553146492, "tokens_seen": 1647050752 }, { "epoch": 0.5, "learning_rate": 0.0005058578077355159, "loss": 0.0681, "theoretical_loss": 3.515881492071057, "tokens_seen": 1647312896 }, { "epoch": 0.5, "learning_rate": 0.0005057775637939335, "loss": 0.0705, "theoretical_loss": 3.515833440784278, "tokens_seen": 1647575040 }, { "epoch": 0.5, "learning_rate": 0.0005056973198523511, "loss": 0.0688, "theoretical_loss": 3.5157853992826036, "tokens_seen": 1647837184 }, { "epoch": 0.5, "learning_rate": 0.0005056170759107688, "loss": 0.0697, "theoretical_loss": 3.5157373675624854, "tokens_seen": 1648099328 }, { "epoch": 0.5, "learning_rate": 0.0005055368319691864, "loss": 0.0712, "theoretical_loss": 3.515689345620377, "tokens_seen": 1648361472 }, { "epoch": 0.5, "learning_rate": 0.000505456588027604, "loss": 0.071, "theoretical_loss": 3.5156413334527317, "tokens_seen": 1648623616 }, { "epoch": 0.5, "learning_rate": 0.0005053763440860215, "loss": 0.0698, "theoretical_loss": 3.515593331056008, "tokens_seen": 1648885760 }, { "epoch": 0.5, "learning_rate": 0.0005052961001444391, "loss": 0.068, "theoretical_loss": 3.5155453384266635, "tokens_seen": 1649147904 }, { "epoch": 0.5, "learning_rate": 0.0005052158562028567, "loss": 0.068, "theoretical_loss": 3.515497355561159, "tokens_seen": 1649410048 }, { "epoch": 0.5, "learning_rate": 0.0005051356122612742, "loss": 0.0712, "theoretical_loss": 3.515449382455957, "tokens_seen": 1649672192 }, { "epoch": 0.5, "learning_rate": 0.0005050553683196919, "loss": 0.0699, "theoretical_loss": 3.515401419107521, "tokens_seen": 1649934336 }, { "epoch": 0.5, "learning_rate": 0.0005049751243781094, "loss": 0.0683, "theoretical_loss": 3.515353465512317, "tokens_seen": 1650196480 }, { "epoch": 0.5, "learning_rate": 0.000504894880436527, "loss": 0.0721, "theoretical_loss": 3.5153055216668134, "tokens_seen": 1650458624 }, { "epoch": 0.5, "learning_rate": 0.0005048146364949447, "loss": 0.0704, "theoretical_loss": 3.51525758756748, "tokens_seen": 1650720768 }, { "epoch": 0.5, "learning_rate": 0.0005047343925533623, "loss": 0.0692, "theoretical_loss": 3.5152096632107876, "tokens_seen": 1650982912 }, { "epoch": 0.5, "learning_rate": 0.0005046541486117798, "loss": 0.0704, "theoretical_loss": 3.5151617485932096, "tokens_seen": 1651245056 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0005821323138661683, "objective/train/docs_used": 603335, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3430393934249878, "objective/train/original_loss": 1.3430393934249878, "objective/train/theoretical_loss": 3.5151138437112213, "objective/train/tokens_used": 1671967200, "objective/train/value_avg": -0.00864410400390625, "objective/train/value_loss": 0.00044420172343961895, "objective/train/value_max": -5.1856040954589844e-05, "objective/train/value_min": -0.92138671875, "objective/train/value_reward_corr": 0.6653913925705341, "objective/train/value_std": 0.01959228515625, "objective/train/weight_avg": 1.0007838010787964, "objective/train/weighted_lm_loss": 1.3428455591201782, "objective/train/weights_max": 2.4011125564575195, "objective/train/weights_min": 0.3695729672908783, "theoretical_loss": 3.5151138437112213, "tokens_seen": 1651507200 }, { "epoch": 0.5, "learning_rate": 0.0005045739046701974, "loss": 0.0665, "theoretical_loss": 3.5151138437112213, "tokens_seen": 1651507200 }, { "epoch": 0.5, "learning_rate": 0.000504493660728615, "loss": 0.0712, "theoretical_loss": 3.5150659485613005, "tokens_seen": 1651769344 }, { "epoch": 0.5, "learning_rate": 0.0005044134167870326, "loss": 0.0693, "theoretical_loss": 3.5150180631399244, "tokens_seen": 1652031488 }, { "epoch": 0.5, "learning_rate": 0.0005043331728454502, "loss": 0.0689, "theoretical_loss": 3.5149701874435753, "tokens_seen": 1652293632 }, { "epoch": 0.5, "learning_rate": 0.0005042529289038677, "loss": 0.0741, "theoretical_loss": 3.514922321468734, "tokens_seen": 1652555776 }, { "epoch": 0.5, "learning_rate": 0.0005041726849622855, "loss": 0.071, "theoretical_loss": 3.5148744652118866, "tokens_seen": 1652817920 }, { "epoch": 0.5, "learning_rate": 0.000504092441020703, "loss": 0.071, "theoretical_loss": 3.5148266186695185, "tokens_seen": 1653080064 }, { "epoch": 0.5, "learning_rate": 0.0005040121970791205, "loss": 0.0687, "theoretical_loss": 3.5147787818381175, "tokens_seen": 1653342208 }, { "epoch": 0.5, "learning_rate": 0.0005039319531375382, "loss": 0.0696, "theoretical_loss": 3.514730954714173, "tokens_seen": 1653604352 }, { "epoch": 0.5, "learning_rate": 0.0005038517091959557, "loss": 0.0693, "theoretical_loss": 3.5146831372941776, "tokens_seen": 1653866496 }, { "epoch": 0.5, "learning_rate": 0.0005037714652543733, "loss": 0.0705, "theoretical_loss": 3.5146353295746247, "tokens_seen": 1654128640 }, { "epoch": 0.5, "learning_rate": 0.0005036912213127909, "loss": 0.0686, "theoretical_loss": 3.514587531552009, "tokens_seen": 1654390784 }, { "epoch": 0.5, "learning_rate": 0.0005036109773712085, "loss": 0.0681, "theoretical_loss": 3.5145397432228274, "tokens_seen": 1654652928 }, { "epoch": 0.5, "learning_rate": 0.000503530733429626, "loss": 0.0723, "theoretical_loss": 3.5144919645835797, "tokens_seen": 1654915072 }, { "epoch": 0.5, "learning_rate": 0.0005034504894880436, "loss": 0.0681, "theoretical_loss": 3.514444195630766, "tokens_seen": 1655177216 }, { "epoch": 0.5, "learning_rate": 0.0005033702455464613, "loss": 0.0683, "theoretical_loss": 3.5143964363608893, "tokens_seen": 1655439360 }, { "epoch": 0.5, "learning_rate": 0.0005032900016048788, "loss": 0.0718, "theoretical_loss": 3.514348686770454, "tokens_seen": 1655701504 }, { "epoch": 0.5, "learning_rate": 0.0005032097576632965, "loss": 0.0683, "theoretical_loss": 3.5143009468559656, "tokens_seen": 1655963648 }, { "epoch": 0.5, "learning_rate": 0.000503129513721714, "loss": 0.0684, "theoretical_loss": 3.514253216613932, "tokens_seen": 1656225792 }, { "epoch": 0.5, "learning_rate": 0.0005030492697801317, "loss": 0.0711, "theoretical_loss": 3.514205496040865, "tokens_seen": 1656487936 }, { "epoch": 0.5, "learning_rate": 0.0005029690258385492, "loss": 0.0692, "theoretical_loss": 3.5141577851332735, "tokens_seen": 1656750080 }, { "epoch": 0.5, "learning_rate": 0.0005028887818969667, "loss": 0.0685, "theoretical_loss": 3.5141100838876724, "tokens_seen": 1657012224 }, { "epoch": 0.5, "learning_rate": 0.0005028085379553844, "loss": 0.0676, "theoretical_loss": 3.5140623923005774, "tokens_seen": 1657274368 }, { "epoch": 0.5, "learning_rate": 0.0005027282940138019, "loss": 0.0675, "theoretical_loss": 3.514014710368505, "tokens_seen": 1657536512 }, { "epoch": 0.5, "learning_rate": 0.0005026480500722196, "loss": 0.0673, "theoretical_loss": 3.513967038087973, "tokens_seen": 1657798656 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.00016826541104819626, "objective/train/docs_used": 605625, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3199695348739624, "objective/train/original_loss": 1.3199697732925415, "objective/train/theoretical_loss": 3.5139193754555036, "objective/train/tokens_used": 1678520800, "objective/train/value_avg": -0.00653076171875, "objective/train/value_loss": 0.0002841904351953417, "objective/train/value_max": -0.00011771917343139648, "objective/train/value_min": -0.70263671875, "objective/train/value_reward_corr": 0.6535262240804112, "objective/train/value_std": 0.01262664794921875, "objective/train/weight_avg": 1.0002937316894531, "objective/train/weighted_lm_loss": 1.3204420804977417, "objective/train/weights_max": 1.227171540260315, "objective/train/weights_min": 0.3689693808555603, "theoretical_loss": 3.5139193754555036, "tokens_seen": 1658060800 }, { "epoch": 0.5, "learning_rate": 0.0005025678061306372, "loss": 0.0688, "theoretical_loss": 3.5139193754555036, "tokens_seen": 1658060800 }, { "epoch": 0.5, "learning_rate": 0.0005024875621890548, "loss": 0.0673, "theoretical_loss": 3.513871722467619, "tokens_seen": 1658322944 }, { "epoch": 0.5, "learning_rate": 0.0005024073182474723, "loss": 0.0701, "theoretical_loss": 3.513824079120843, "tokens_seen": 1658585088 }, { "epoch": 0.5, "learning_rate": 0.0005023270743058899, "loss": 0.0696, "theoretical_loss": 3.513776445411702, "tokens_seen": 1658847232 }, { "epoch": 0.5, "learning_rate": 0.0005022468303643075, "loss": 0.067, "theoretical_loss": 3.5137288213367235, "tokens_seen": 1659109376 }, { "epoch": 0.5, "learning_rate": 0.000502166586422725, "loss": 0.0698, "theoretical_loss": 3.5136812068924375, "tokens_seen": 1659371520 }, { "epoch": 0.5, "learning_rate": 0.0005020863424811427, "loss": 0.069, "theoretical_loss": 3.513633602075376, "tokens_seen": 1659633664 }, { "epoch": 0.5, "learning_rate": 0.0005020060985395602, "loss": 0.0685, "theoretical_loss": 3.513586006882071, "tokens_seen": 1659895808 }, { "epoch": 0.5, "learning_rate": 0.000501925854597978, "loss": 0.0691, "theoretical_loss": 3.513538421309059, "tokens_seen": 1660157952 }, { "epoch": 0.5, "learning_rate": 0.0005018456106563955, "loss": 0.0697, "theoretical_loss": 3.5134908453528757, "tokens_seen": 1660420096 }, { "epoch": 0.5, "learning_rate": 0.000501765366714813, "loss": 0.0685, "theoretical_loss": 3.51344327901006, "tokens_seen": 1660682240 }, { "epoch": 0.5, "learning_rate": 0.0005016851227732307, "loss": 0.0699, "theoretical_loss": 3.513395722277153, "tokens_seen": 1660944384 }, { "epoch": 0.5, "learning_rate": 0.0005016048788316482, "loss": 0.0706, "theoretical_loss": 3.513348175150696, "tokens_seen": 1661206528 }, { "epoch": 0.5, "learning_rate": 0.0005015246348900658, "loss": 0.0658, "theoretical_loss": 3.5133006376272338, "tokens_seen": 1661468672 }, { "epoch": 0.5, "learning_rate": 0.0005014443909484834, "loss": 0.0685, "theoretical_loss": 3.5132531097033115, "tokens_seen": 1661730816 }, { "epoch": 0.5, "learning_rate": 0.000501364147006901, "loss": 0.0655, "theoretical_loss": 3.5132055913754776, "tokens_seen": 1661992960 }, { "epoch": 0.5, "learning_rate": 0.0005012839030653185, "loss": 0.0694, "theoretical_loss": 3.5131580826402806, "tokens_seen": 1662255104 }, { "epoch": 0.5, "learning_rate": 0.0005012036591237363, "loss": 0.0703, "theoretical_loss": 3.5131105834942726, "tokens_seen": 1662517248 }, { "epoch": 0.5, "learning_rate": 0.0005011234151821538, "loss": 0.0698, "theoretical_loss": 3.5130630939340053, "tokens_seen": 1662779392 }, { "epoch": 0.5, "learning_rate": 0.0005010431712405713, "loss": 0.069, "theoretical_loss": 3.5130156139560347, "tokens_seen": 1663041536 }, { "epoch": 0.5, "learning_rate": 0.000500962927298989, "loss": 0.0703, "theoretical_loss": 3.512968143556917, "tokens_seen": 1663303680 }, { "epoch": 0.5, "learning_rate": 0.0005008826833574065, "loss": 0.0693, "theoretical_loss": 3.51292068273321, "tokens_seen": 1663565824 }, { "epoch": 0.5, "learning_rate": 0.0005008024394158241, "loss": 0.0682, "theoretical_loss": 3.512873231481474, "tokens_seen": 1663827968 }, { "epoch": 0.5, "learning_rate": 0.0005007221954742417, "loss": 0.0681, "theoretical_loss": 3.512825789798271, "tokens_seen": 1664090112 }, { "epoch": 0.5, "learning_rate": 0.0005006419515326593, "loss": 0.0683, "theoretical_loss": 3.5127783576801646, "tokens_seen": 1664352256 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.00016097695333883166, "objective/train/docs_used": 608106, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3378410339355469, "objective/train/original_loss": 1.3378409147262573, "objective/train/theoretical_loss": 3.51273093512372, "objective/train/tokens_used": 1685074400, "objective/train/value_avg": -0.00897979736328125, "objective/train/value_loss": 0.00020667578792199492, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.70166015625, "objective/train/value_reward_corr": 0.7562442994128528, "objective/train/value_std": 0.0157470703125, "objective/train/weight_avg": 1.000257968902588, "objective/train/weighted_lm_loss": 1.3374638557434082, "objective/train/weights_max": 1.2285205125808716, "objective/train/weights_min": 0.39345037937164307, "theoretical_loss": 3.51273093512372, "tokens_seen": 1664614400 }, { "epoch": 0.5, "learning_rate": 0.0005005617075910769, "loss": 0.0709, "theoretical_loss": 3.51273093512372, "tokens_seen": 1664614400 }, { "epoch": 0.5, "learning_rate": 0.0005004814636494944, "loss": 0.0681, "theoretical_loss": 3.512683522125505, "tokens_seen": 1664876544 }, { "epoch": 0.5, "learning_rate": 0.0005004012197079121, "loss": 0.0683, "theoretical_loss": 3.512636118682088, "tokens_seen": 1665138688 }, { "epoch": 0.5, "learning_rate": 0.0005003209757663297, "loss": 0.0687, "theoretical_loss": 3.5125887247900396, "tokens_seen": 1665400832 }, { "epoch": 0.5, "learning_rate": 0.0005002407318247473, "loss": 0.0711, "theoretical_loss": 3.512541340445933, "tokens_seen": 1665662976 }, { "epoch": 0.5, "learning_rate": 0.0005001604878831648, "loss": 0.0695, "theoretical_loss": 3.5124939656463416, "tokens_seen": 1665925120 }, { "epoch": 0.5, "learning_rate": 0.0005000802439415825, "loss": 0.0678, "theoretical_loss": 3.5124466003878423, "tokens_seen": 1666187264 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 0.0694, "theoretical_loss": 3.512399244667012, "tokens_seen": 1666449408 }, { "epoch": 0.51, "learning_rate": 0.0004999197560584176, "loss": 0.0711, "theoretical_loss": 3.5123518984804303, "tokens_seen": 1666711552 }, { "epoch": 0.51, "learning_rate": 0.0004998395121168352, "loss": 0.0727, "theoretical_loss": 3.5123045618246795, "tokens_seen": 1666973696 }, { "epoch": 0.51, "learning_rate": 0.0004997592681752528, "loss": 0.0685, "theoretical_loss": 3.5122572346963423, "tokens_seen": 1667235840 }, { "epoch": 0.51, "learning_rate": 0.0004996790242336704, "loss": 0.0683, "theoretical_loss": 3.512209917092003, "tokens_seen": 1667497984 }, { "epoch": 0.51, "learning_rate": 0.000499598780292088, "loss": 0.0699, "theoretical_loss": 3.5121626090082487, "tokens_seen": 1667760128 }, { "epoch": 0.51, "learning_rate": 0.0004995185363505056, "loss": 0.0694, "theoretical_loss": 3.5121153104416676, "tokens_seen": 1668022272 }, { "epoch": 0.51, "learning_rate": 0.0004994382924089231, "loss": 0.0653, "theoretical_loss": 3.5120680213888504, "tokens_seen": 1668284416 }, { "epoch": 0.51, "learning_rate": 0.0004993580484673407, "loss": 0.0686, "theoretical_loss": 3.512020741846388, "tokens_seen": 1668546560 }, { "epoch": 0.51, "learning_rate": 0.0004992778045257583, "loss": 0.0676, "theoretical_loss": 3.511973471810875, "tokens_seen": 1668808704 }, { "epoch": 0.51, "learning_rate": 0.0004991975605841759, "loss": 0.069, "theoretical_loss": 3.5119262112789063, "tokens_seen": 1669070848 }, { "epoch": 0.51, "learning_rate": 0.0004991173166425935, "loss": 0.0679, "theoretical_loss": 3.5118789602470786, "tokens_seen": 1669332992 }, { "epoch": 0.51, "learning_rate": 0.000499037072701011, "loss": 0.0727, "theoretical_loss": 3.5118317187119916, "tokens_seen": 1669595136 }, { "epoch": 0.51, "learning_rate": 0.0004989568287594286, "loss": 0.07, "theoretical_loss": 3.511784486670246, "tokens_seen": 1669857280 }, { "epoch": 0.51, "learning_rate": 0.0004988765848178463, "loss": 0.0673, "theoretical_loss": 3.5117372641184432, "tokens_seen": 1670119424 }, { "epoch": 0.51, "learning_rate": 0.0004987963408762639, "loss": 0.0707, "theoretical_loss": 3.5116900510531885, "tokens_seen": 1670381568 }, { "epoch": 0.51, "learning_rate": 0.0004987160969346815, "loss": 0.069, "theoretical_loss": 3.5116428474710872, "tokens_seen": 1670643712 }, { "epoch": 0.51, "learning_rate": 0.0004986358529930991, "loss": 0.0706, "theoretical_loss": 3.5115956533687473, "tokens_seen": 1670905856 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.0005539392586797476, "objective/train/docs_used": 610761, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.340118169784546, "objective/train/original_loss": 1.3401179313659668, "objective/train/theoretical_loss": 3.5115484687427783, "objective/train/tokens_used": 1691628000, "objective/train/value_avg": -0.0100250244140625, "objective/train/value_loss": 0.00027908929041586816, "objective/train/value_max": -8.028745651245117e-05, "objective/train/value_min": -0.5888671875, "objective/train/value_reward_corr": 0.7176989029137323, "objective/train/value_std": 0.01812744140625, "objective/train/weight_avg": 1.0006839036941528, "objective/train/weighted_lm_loss": 1.3407360315322876, "objective/train/weights_max": 1.7151225805282593, "objective/train/weights_min": 0.3900670111179352, "theoretical_loss": 3.5115484687427783, "tokens_seen": 1671168000 }, { "epoch": 0.51, "learning_rate": 0.0004985556090515166, "loss": 0.0734, "theoretical_loss": 3.5115484687427783, "tokens_seen": 1671168000 }, { "epoch": 0.51, "learning_rate": 0.0004984753651099342, "loss": 0.0652, "theoretical_loss": 3.5115012935897907, "tokens_seen": 1671430144 }, { "epoch": 0.51, "learning_rate": 0.0004983951211683518, "loss": 0.0676, "theoretical_loss": 3.5114541279063975, "tokens_seen": 1671692288 }, { "epoch": 0.51, "learning_rate": 0.0004983148772267693, "loss": 0.0692, "theoretical_loss": 3.511406971689214, "tokens_seen": 1671954432 }, { "epoch": 0.51, "learning_rate": 0.0004982346332851869, "loss": 0.0673, "theoretical_loss": 3.511359824934856, "tokens_seen": 1672216576 }, { "epoch": 0.51, "learning_rate": 0.0004981543893436046, "loss": 0.0666, "theoretical_loss": 3.5113126876399416, "tokens_seen": 1672478720 }, { "epoch": 0.51, "learning_rate": 0.0004980741454020222, "loss": 0.0677, "theoretical_loss": 3.511265559801091, "tokens_seen": 1672740864 }, { "epoch": 0.51, "learning_rate": 0.0004979939014604398, "loss": 0.069, "theoretical_loss": 3.5112184414149255, "tokens_seen": 1673003008 }, { "epoch": 0.51, "learning_rate": 0.0004979136575188573, "loss": 0.0681, "theoretical_loss": 3.5111713324780687, "tokens_seen": 1673265152 }, { "epoch": 0.51, "learning_rate": 0.0004978334135772749, "loss": 0.0702, "theoretical_loss": 3.5111242329871457, "tokens_seen": 1673527296 }, { "epoch": 0.51, "learning_rate": 0.0004977531696356925, "loss": 0.0689, "theoretical_loss": 3.5110771429387824, "tokens_seen": 1673789440 }, { "epoch": 0.51, "learning_rate": 0.0004976729256941101, "loss": 0.0688, "theoretical_loss": 3.511030062329608, "tokens_seen": 1674051584 }, { "epoch": 0.51, "learning_rate": 0.0004975926817525277, "loss": 0.0691, "theoretical_loss": 3.5109829911562533, "tokens_seen": 1674313728 }, { "epoch": 0.51, "learning_rate": 0.0004975124378109452, "loss": 0.0661, "theoretical_loss": 3.5109359294153495, "tokens_seen": 1674575872 }, { "epoch": 0.51, "learning_rate": 0.0004974321938693629, "loss": 0.0678, "theoretical_loss": 3.5108888771035307, "tokens_seen": 1674838016 }, { "epoch": 0.51, "learning_rate": 0.0004973519499277805, "loss": 0.0681, "theoretical_loss": 3.5108418342174317, "tokens_seen": 1675100160 }, { "epoch": 0.51, "learning_rate": 0.0004972717059861981, "loss": 0.0678, "theoretical_loss": 3.5107948007536907, "tokens_seen": 1675362304 }, { "epoch": 0.51, "learning_rate": 0.0004971914620446156, "loss": 0.0673, "theoretical_loss": 3.5107477767089454, "tokens_seen": 1675624448 }, { "epoch": 0.51, "learning_rate": 0.0004971112181030332, "loss": 0.0663, "theoretical_loss": 3.5107007620798374, "tokens_seen": 1675886592 }, { "epoch": 0.51, "learning_rate": 0.0004970309741614508, "loss": 0.0682, "theoretical_loss": 3.510653756863009, "tokens_seen": 1676148736 }, { "epoch": 0.51, "learning_rate": 0.0004969507302198684, "loss": 0.0676, "theoretical_loss": 3.5106067610551035, "tokens_seen": 1676410880 }, { "epoch": 0.51, "learning_rate": 0.000496870486278286, "loss": 0.071, "theoretical_loss": 3.5105597746527675, "tokens_seen": 1676673024 }, { "epoch": 0.51, "learning_rate": 0.0004967902423367036, "loss": 0.0657, "theoretical_loss": 3.5105127976526482, "tokens_seen": 1676935168 }, { "epoch": 0.51, "learning_rate": 0.0004967099983951212, "loss": 0.0679, "theoretical_loss": 3.5104658300513942, "tokens_seen": 1677197312 }, { "epoch": 0.51, "learning_rate": 0.0004966297544535388, "loss": 0.0655, "theoretical_loss": 3.5104188718456575, "tokens_seen": 1677459456 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.00019064874504692852, "objective/train/docs_used": 612648, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3213294744491577, "objective/train/original_loss": 1.3213295936584473, "objective/train/theoretical_loss": 3.5103719230320904, "objective/train/tokens_used": 1698181600, "objective/train/value_avg": -0.0089111328125, "objective/train/value_loss": 0.000392118381569162, "objective/train/value_max": -7.367134094238281e-05, "objective/train/value_min": -0.72900390625, "objective/train/value_reward_corr": 0.6955816812324872, "objective/train/value_std": 0.0175933837890625, "objective/train/weight_avg": 1.0003646612167358, "objective/train/weighted_lm_loss": 1.3213082551956177, "objective/train/weights_max": 1.415054440498352, "objective/train/weights_min": 0.3721874952316284, "theoretical_loss": 3.5103719230320904, "tokens_seen": 1677721600 }, { "epoch": 0.51, "learning_rate": 0.0004965495105119564, "loss": 0.0681, "theoretical_loss": 3.5103719230320904, "tokens_seen": 1677721600 }, { "epoch": 0.51, "learning_rate": 0.000496469266570374, "loss": 0.069, "theoretical_loss": 3.510324983607347, "tokens_seen": 1677983744 }, { "epoch": 0.51, "learning_rate": 0.0004963890226287915, "loss": 0.0682, "theoretical_loss": 3.5102780535680838, "tokens_seen": 1678245888 }, { "epoch": 0.51, "learning_rate": 0.0004963087786872091, "loss": 0.0701, "theoretical_loss": 3.510231132910958, "tokens_seen": 1678508032 }, { "epoch": 0.51, "learning_rate": 0.0004962285347456267, "loss": 0.0679, "theoretical_loss": 3.5101842216326293, "tokens_seen": 1678770176 }, { "epoch": 0.51, "learning_rate": 0.0004961482908040443, "loss": 0.0674, "theoretical_loss": 3.5101373197297594, "tokens_seen": 1679032320 }, { "epoch": 0.51, "learning_rate": 0.0004960680468624618, "loss": 0.0672, "theoretical_loss": 3.510090427199011, "tokens_seen": 1679294464 }, { "epoch": 0.51, "learning_rate": 0.0004959878029208794, "loss": 0.0689, "theoretical_loss": 3.5100435440370483, "tokens_seen": 1679556608 }, { "epoch": 0.51, "learning_rate": 0.0004959075589792971, "loss": 0.0684, "theoretical_loss": 3.5099966702405383, "tokens_seen": 1679818752 }, { "epoch": 0.51, "learning_rate": 0.0004958273150377147, "loss": 0.0712, "theoretical_loss": 3.5099498058061487, "tokens_seen": 1680080896 }, { "epoch": 0.51, "learning_rate": 0.0004957470710961323, "loss": 0.068, "theoretical_loss": 3.5099029507305497, "tokens_seen": 1680343040 }, { "epoch": 0.51, "learning_rate": 0.0004956668271545499, "loss": 0.0703, "theoretical_loss": 3.509856105010412, "tokens_seen": 1680605184 }, { "epoch": 0.51, "learning_rate": 0.0004955865832129674, "loss": 0.073, "theoretical_loss": 3.509809268642409, "tokens_seen": 1680867328 }, { "epoch": 0.51, "learning_rate": 0.000495506339271385, "loss": 0.071, "theoretical_loss": 3.5097624416232156, "tokens_seen": 1681129472 }, { "epoch": 0.51, "learning_rate": 0.0004954260953298026, "loss": 0.0702, "theoretical_loss": 3.509715623949509, "tokens_seen": 1681391616 }, { "epoch": 0.51, "learning_rate": 0.0004953458513882202, "loss": 0.0712, "theoretical_loss": 3.509668815617967, "tokens_seen": 1681653760 }, { "epoch": 0.51, "learning_rate": 0.0004952656074466377, "loss": 0.0692, "theoretical_loss": 3.5096220166252694, "tokens_seen": 1681915904 }, { "epoch": 0.51, "learning_rate": 0.0004951853635050554, "loss": 0.0683, "theoretical_loss": 3.509575226968098, "tokens_seen": 1682178048 }, { "epoch": 0.51, "learning_rate": 0.000495105119563473, "loss": 0.0691, "theoretical_loss": 3.5095284466431362, "tokens_seen": 1682440192 }, { "epoch": 0.51, "learning_rate": 0.0004950248756218906, "loss": 0.068, "theoretical_loss": 3.509481675647069, "tokens_seen": 1682702336 }, { "epoch": 0.51, "learning_rate": 0.0004949446316803081, "loss": 0.0684, "theoretical_loss": 3.509434913976583, "tokens_seen": 1682964480 }, { "epoch": 0.51, "learning_rate": 0.0004948643877387257, "loss": 0.0694, "theoretical_loss": 3.509388161628367, "tokens_seen": 1683226624 }, { "epoch": 0.51, "learning_rate": 0.0004947841437971433, "loss": 0.0698, "theoretical_loss": 3.509341418599111, "tokens_seen": 1683488768 }, { "epoch": 0.51, "learning_rate": 0.0004947038998555609, "loss": 0.0695, "theoretical_loss": 3.509294684885506, "tokens_seen": 1683750912 }, { "epoch": 0.51, "learning_rate": 0.0004946236559139785, "loss": 0.0712, "theoretical_loss": 3.509247960484247, "tokens_seen": 1684013056 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.000966555206105113, "objective/train/docs_used": 615159, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3198797702789307, "objective/train/original_loss": 1.3198797702789307, "objective/train/theoretical_loss": 3.5092012453920285, "objective/train/tokens_used": 1704735200, "objective/train/value_avg": -0.00679779052734375, "objective/train/value_loss": 0.00012040092406095937, "objective/train/value_max": -6.300210952758789e-05, "objective/train/value_min": -0.474609375, "objective/train/value_reward_corr": 0.700724774795992, "objective/train/value_std": 0.0108489990234375, "objective/train/weight_avg": 1.0010249614715576, "objective/train/weighted_lm_loss": 1.3210926055908203, "objective/train/weights_max": 1.2091786861419678, "objective/train/weights_min": 0.6098971962928772, "theoretical_loss": 3.5092012453920285, "tokens_seen": 1684275200 }, { "epoch": 0.51, "learning_rate": 0.0004945434119723962, "loss": 0.0686, "theoretical_loss": 3.5092012453920285, "tokens_seen": 1684275200 }, { "epoch": 0.51, "learning_rate": 0.0004944631680308137, "loss": 0.0685, "theoretical_loss": 3.5091545396055475, "tokens_seen": 1684537344 }, { "epoch": 0.51, "learning_rate": 0.0004943829240892313, "loss": 0.0674, "theoretical_loss": 3.5091078431215017, "tokens_seen": 1684799488 }, { "epoch": 0.51, "learning_rate": 0.0004943026801476489, "loss": 0.0674, "theoretical_loss": 3.5090611559365925, "tokens_seen": 1685061632 }, { "epoch": 0.51, "learning_rate": 0.0004942224362060665, "loss": 0.0685, "theoretical_loss": 3.509014478047522, "tokens_seen": 1685323776 }, { "epoch": 0.51, "learning_rate": 0.000494142192264484, "loss": 0.0709, "theoretical_loss": 3.5089678094509926, "tokens_seen": 1685585920 }, { "epoch": 0.51, "learning_rate": 0.0004940619483229016, "loss": 0.0691, "theoretical_loss": 3.5089211501437103, "tokens_seen": 1685848064 }, { "epoch": 0.51, "learning_rate": 0.0004939817043813192, "loss": 0.0684, "theoretical_loss": 3.5088745001223822, "tokens_seen": 1686110208 }, { "epoch": 0.51, "learning_rate": 0.0004939014604397368, "loss": 0.0698, "theoretical_loss": 3.508827859383717, "tokens_seen": 1686372352 }, { "epoch": 0.51, "learning_rate": 0.0004938212164981544, "loss": 0.0674, "theoretical_loss": 3.5087812279244246, "tokens_seen": 1686634496 }, { "epoch": 0.51, "learning_rate": 0.000493740972556572, "loss": 0.0691, "theoretical_loss": 3.5087346057412176, "tokens_seen": 1686896640 }, { "epoch": 0.51, "learning_rate": 0.0004936607286149896, "loss": 0.0734, "theoretical_loss": 3.508687992830809, "tokens_seen": 1687158784 }, { "epoch": 0.51, "learning_rate": 0.0004935804846734072, "loss": 0.0731, "theoretical_loss": 3.5086413891899144, "tokens_seen": 1687420928 }, { "epoch": 0.51, "learning_rate": 0.0004935002407318248, "loss": 0.0714, "theoretical_loss": 3.5085947948152514, "tokens_seen": 1687683072 }, { "epoch": 0.51, "learning_rate": 0.0004934199967902424, "loss": 0.0688, "theoretical_loss": 3.5085482097035383, "tokens_seen": 1687945216 }, { "epoch": 0.51, "learning_rate": 0.0004933397528486599, "loss": 0.0693, "theoretical_loss": 3.508501633851495, "tokens_seen": 1688207360 }, { "epoch": 0.51, "learning_rate": 0.0004932595089070775, "loss": 0.0672, "theoretical_loss": 3.5084550672558446, "tokens_seen": 1688469504 }, { "epoch": 0.51, "learning_rate": 0.0004931792649654951, "loss": 0.0694, "theoretical_loss": 3.50840850991331, "tokens_seen": 1688731648 }, { "epoch": 0.51, "learning_rate": 0.0004930990210239126, "loss": 0.0655, "theoretical_loss": 3.5083619618206168, "tokens_seen": 1688993792 }, { "epoch": 0.51, "learning_rate": 0.0004930187770823302, "loss": 0.0664, "theoretical_loss": 3.5083154229744924, "tokens_seen": 1689255936 }, { "epoch": 0.51, "learning_rate": 0.0004929385331407479, "loss": 0.0735, "theoretical_loss": 3.5082688933716653, "tokens_seen": 1689518080 }, { "epoch": 0.51, "learning_rate": 0.0004928582891991655, "loss": 0.0701, "theoretical_loss": 3.5082223730088655, "tokens_seen": 1689780224 }, { "epoch": 0.51, "learning_rate": 0.0004927780452575831, "loss": 0.0695, "theoretical_loss": 3.5081758618828256, "tokens_seen": 1690042368 }, { "epoch": 0.51, "learning_rate": 0.0004926978013160007, "loss": 0.0682, "theoretical_loss": 3.5081293599902788, "tokens_seen": 1690304512 }, { "epoch": 0.51, "learning_rate": 0.0004926175573744183, "loss": 0.0693, "theoretical_loss": 3.5080828673279614, "tokens_seen": 1690566656 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.000561828725039959, "objective/train/docs_used": 617433, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3782442808151245, "objective/train/original_loss": 1.3782442808151245, "objective/train/theoretical_loss": 3.5080363838926094, "objective/train/tokens_used": 1711288800, "objective/train/value_avg": -0.006938934326171875, "objective/train/value_loss": 0.00027447956381365657, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.72607421875, "objective/train/value_reward_corr": 0.6786282922023184, "objective/train/value_std": 0.016754150390625, "objective/train/weight_avg": 1.0006831884384155, "objective/train/weighted_lm_loss": 1.3784576654434204, "objective/train/weights_max": 1.5841267108917236, "objective/train/weights_min": 0.3688187897205353, "theoretical_loss": 3.5080363838926094, "tokens_seen": 1690828800 }, { "epoch": 0.51, "learning_rate": 0.0004925373134328358, "loss": 0.0686, "theoretical_loss": 3.5080363838926094, "tokens_seen": 1690828800 }, { "epoch": 0.51, "learning_rate": 0.0004924570694912534, "loss": 0.0688, "theoretical_loss": 3.5079899096809624, "tokens_seen": 1691090944 }, { "epoch": 0.51, "learning_rate": 0.000492376825549671, "loss": 0.0691, "theoretical_loss": 3.5079434446897597, "tokens_seen": 1691353088 }, { "epoch": 0.51, "learning_rate": 0.0004922965816080887, "loss": 0.0695, "theoretical_loss": 3.5078969889157445, "tokens_seen": 1691615232 }, { "epoch": 0.51, "learning_rate": 0.0004922163376665062, "loss": 0.0685, "theoretical_loss": 3.5078505423556594, "tokens_seen": 1691877376 }, { "epoch": 0.51, "learning_rate": 0.0004921360937249238, "loss": 0.0708, "theoretical_loss": 3.50780410500625, "tokens_seen": 1692139520 }, { "epoch": 0.51, "learning_rate": 0.0004920558497833414, "loss": 0.0707, "theoretical_loss": 3.507757676864264, "tokens_seen": 1692401664 }, { "epoch": 0.51, "learning_rate": 0.0004919756058417589, "loss": 0.067, "theoretical_loss": 3.5077112579264496, "tokens_seen": 1692663808 }, { "epoch": 0.51, "learning_rate": 0.0004918953619001765, "loss": 0.0693, "theoretical_loss": 3.507664848189557, "tokens_seen": 1692925952 }, { "epoch": 0.51, "learning_rate": 0.0004918151179585941, "loss": 0.0715, "theoretical_loss": 3.507618447650337, "tokens_seen": 1693188096 }, { "epoch": 0.51, "learning_rate": 0.0004917348740170117, "loss": 0.0696, "theoretical_loss": 3.5075720563055457, "tokens_seen": 1693450240 }, { "epoch": 0.51, "learning_rate": 0.0004916546300754293, "loss": 0.0667, "theoretical_loss": 3.5075256741519363, "tokens_seen": 1693712384 }, { "epoch": 0.51, "learning_rate": 0.000491574386133847, "loss": 0.0667, "theoretical_loss": 3.507479301186266, "tokens_seen": 1693974528 }, { "epoch": 0.51, "learning_rate": 0.0004914941421922646, "loss": 0.0708, "theoretical_loss": 3.5074329374052944, "tokens_seen": 1694236672 }, { "epoch": 0.51, "learning_rate": 0.0004914138982506821, "loss": 0.0662, "theoretical_loss": 3.507386582805781, "tokens_seen": 1694498816 }, { "epoch": 0.51, "learning_rate": 0.0004913336543090997, "loss": 0.0681, "theoretical_loss": 3.5073402373844864, "tokens_seen": 1694760960 }, { "epoch": 0.51, "learning_rate": 0.0004912534103675173, "loss": 0.0695, "theoretical_loss": 3.5072939011381763, "tokens_seen": 1695023104 }, { "epoch": 0.51, "learning_rate": 0.0004911731664259348, "loss": 0.0672, "theoretical_loss": 3.507247574063614, "tokens_seen": 1695285248 }, { "epoch": 0.51, "learning_rate": 0.0004910929224843524, "loss": 0.0664, "theoretical_loss": 3.5072012561575674, "tokens_seen": 1695547392 }, { "epoch": 0.51, "learning_rate": 0.00049101267854277, "loss": 0.0675, "theoretical_loss": 3.5071549474168036, "tokens_seen": 1695809536 }, { "epoch": 0.51, "learning_rate": 0.0004909324346011876, "loss": 0.0699, "theoretical_loss": 3.507108647838094, "tokens_seen": 1696071680 }, { "epoch": 0.51, "learning_rate": 0.0004908521906596052, "loss": 0.0671, "theoretical_loss": 3.507062357418209, "tokens_seen": 1696333824 }, { "epoch": 0.51, "learning_rate": 0.0004907719467180228, "loss": 0.0681, "theoretical_loss": 3.5070160761539233, "tokens_seen": 1696595968 }, { "epoch": 0.51, "learning_rate": 0.0004906917027764404, "loss": 0.0682, "theoretical_loss": 3.506969804042011, "tokens_seen": 1696858112 }, { "epoch": 0.51, "learning_rate": 0.000490611458834858, "loss": 0.0707, "theoretical_loss": 3.5069235410792485, "tokens_seen": 1697120256 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.00037086568772792816, "objective/train/docs_used": 619912, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4011075496673584, "objective/train/original_loss": 1.4011075496673584, "objective/train/theoretical_loss": 3.506877287262414, "objective/train/tokens_used": 1717842400, "objective/train/value_avg": -0.00641632080078125, "objective/train/value_loss": 0.00015510381490457803, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.32763671875, "objective/train/value_reward_corr": 0.6610992539997755, "objective/train/value_std": 0.01039886474609375, "objective/train/weight_avg": 1.000441551208496, "objective/train/weighted_lm_loss": 1.4019138813018799, "objective/train/weights_max": 1.1068991422653198, "objective/train/weights_min": 0.36844298243522644, "theoretical_loss": 3.506877287262414, "tokens_seen": 1697382400 }, { "epoch": 0.51, "learning_rate": 0.0004905312148932756, "loss": 0.0692, "theoretical_loss": 3.506877287262414, "tokens_seen": 1697382400 }, { "epoch": 0.51, "learning_rate": 0.0004904509709516932, "loss": 0.0671, "theoretical_loss": 3.5068310425882876, "tokens_seen": 1697644544 }, { "epoch": 0.51, "learning_rate": 0.0004903707270101108, "loss": 0.0696, "theoretical_loss": 3.506784807053651, "tokens_seen": 1697906688 }, { "epoch": 0.51, "learning_rate": 0.0004902904830685283, "loss": 0.0704, "theoretical_loss": 3.506738580655287, "tokens_seen": 1698168832 }, { "epoch": 0.51, "learning_rate": 0.0004902102391269459, "loss": 0.0673, "theoretical_loss": 3.5066923633899796, "tokens_seen": 1698430976 }, { "epoch": 0.51, "learning_rate": 0.0004901299951853635, "loss": 0.0683, "theoretical_loss": 3.506646155254516, "tokens_seen": 1698693120 }, { "epoch": 0.51, "learning_rate": 0.000490049751243781, "loss": 0.0686, "theoretical_loss": 3.506599956245684, "tokens_seen": 1698955264 }, { "epoch": 0.51, "learning_rate": 0.0004899695073021987, "loss": 0.0689, "theoretical_loss": 3.5065537663602737, "tokens_seen": 1699217408 }, { "epoch": 0.52, "learning_rate": 0.0004898892633606163, "loss": 0.0678, "theoretical_loss": 3.506507585595075, "tokens_seen": 1699479552 }, { "epoch": 0.52, "learning_rate": 0.0004898090194190339, "loss": 0.0681, "theoretical_loss": 3.506461413946882, "tokens_seen": 1699741696 }, { "epoch": 0.52, "learning_rate": 0.0004897287754774515, "loss": 0.0653, "theoretical_loss": 3.5064152514124887, "tokens_seen": 1700003840 }, { "epoch": 0.52, "learning_rate": 0.0004896485315358691, "loss": 0.0701, "theoretical_loss": 3.506369097988691, "tokens_seen": 1700265984 }, { "epoch": 0.52, "learning_rate": 0.0004895682875942866, "loss": 0.0678, "theoretical_loss": 3.5063229536722864, "tokens_seen": 1700528128 }, { "epoch": 0.52, "learning_rate": 0.0004894880436527042, "loss": 0.0688, "theoretical_loss": 3.5062768184600754, "tokens_seen": 1700790272 }, { "epoch": 0.52, "learning_rate": 0.0004894077997111218, "loss": 0.0674, "theoretical_loss": 3.5062306923488573, "tokens_seen": 1701052416 }, { "epoch": 0.52, "learning_rate": 0.0004893275557695395, "loss": 0.0697, "theoretical_loss": 3.5061845753354355, "tokens_seen": 1701314560 }, { "epoch": 0.52, "learning_rate": 0.000489247311827957, "loss": 0.0673, "theoretical_loss": 3.5061384674166147, "tokens_seen": 1701576704 }, { "epoch": 0.52, "learning_rate": 0.0004891670678863746, "loss": 0.0663, "theoretical_loss": 3.5060923685892, "tokens_seen": 1701838848 }, { "epoch": 0.52, "learning_rate": 0.0004890868239447922, "loss": 0.0714, "theoretical_loss": 3.5060462788499986, "tokens_seen": 1702100992 }, { "epoch": 0.52, "learning_rate": 0.0004890065800032098, "loss": 0.0684, "theoretical_loss": 3.50600019819582, "tokens_seen": 1702363136 }, { "epoch": 0.52, "learning_rate": 0.0004889263360616273, "loss": 0.0679, "theoretical_loss": 3.5059541266234744, "tokens_seen": 1702625280 }, { "epoch": 0.52, "learning_rate": 0.0004888460921200449, "loss": 0.0688, "theoretical_loss": 3.505908064129775, "tokens_seen": 1702887424 }, { "epoch": 0.52, "learning_rate": 0.0004887658481784625, "loss": 0.0655, "theoretical_loss": 3.5058620107115344, "tokens_seen": 1703149568 }, { "epoch": 0.52, "learning_rate": 0.0004886856042368801, "loss": 0.068, "theoretical_loss": 3.505815966365568, "tokens_seen": 1703411712 }, { "epoch": 0.52, "learning_rate": 0.0004886053602952977, "loss": 0.0701, "theoretical_loss": 3.5057699310886945, "tokens_seen": 1703673856 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.0003058328293263912, "objective/train/docs_used": 622235, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3641868829727173, "objective/train/original_loss": 1.3641867637634277, "objective/train/theoretical_loss": 3.505723904877731, "objective/train/tokens_used": 1724396000, "objective/train/value_avg": -0.00513458251953125, "objective/train/value_loss": 0.00021216008462943137, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.95166015625, "objective/train/value_reward_corr": 0.6547522900747396, "objective/train/value_std": 0.0124664306640625, "objective/train/weight_avg": 1.0004013776779175, "objective/train/weighted_lm_loss": 1.3642295598983765, "objective/train/weights_max": 2.1640307903289795, "objective/train/weights_min": 0.3709288537502289, "theoretical_loss": 3.505723904877731, "tokens_seen": 1703936000 }, { "epoch": 0.52, "learning_rate": 0.0004885251163537154, "loss": 0.0687, "theoretical_loss": 3.505723904877731, "tokens_seen": 1703936000 }, { "epoch": 0.52, "learning_rate": 0.0004884448724121329, "loss": 0.0668, "theoretical_loss": 3.5056778877294983, "tokens_seen": 1704198144 }, { "epoch": 0.52, "learning_rate": 0.0004883646284705505, "loss": 0.0681, "theoretical_loss": 3.5056318796408186, "tokens_seen": 1704460288 }, { "epoch": 0.52, "learning_rate": 0.0004882843845289681, "loss": 0.0684, "theoretical_loss": 3.505585880608515, "tokens_seen": 1704722432 }, { "epoch": 0.52, "learning_rate": 0.00048820414058738565, "loss": 0.0682, "theoretical_loss": 3.505539890629412, "tokens_seen": 1704984576 }, { "epoch": 0.52, "learning_rate": 0.00048812389664580326, "loss": 0.0683, "theoretical_loss": 3.5054939097003377, "tokens_seen": 1705246720 }, { "epoch": 0.52, "learning_rate": 0.0004880436527042209, "loss": 0.0694, "theoretical_loss": 3.505447937818119, "tokens_seen": 1705508864 }, { "epoch": 0.52, "learning_rate": 0.00048796340876263843, "loss": 0.0686, "theoretical_loss": 3.5054019749795864, "tokens_seen": 1705771008 }, { "epoch": 0.52, "learning_rate": 0.00048788316482105605, "loss": 0.071, "theoretical_loss": 3.5053560211815715, "tokens_seen": 1706033152 }, { "epoch": 0.52, "learning_rate": 0.0004878029208794736, "loss": 0.0683, "theoretical_loss": 3.505310076420907, "tokens_seen": 1706295296 }, { "epoch": 0.52, "learning_rate": 0.00048772267693789116, "loss": 0.0672, "theoretical_loss": 3.505264140694428, "tokens_seen": 1706557440 }, { "epoch": 0.52, "learning_rate": 0.0004876424329963088, "loss": 0.0696, "theoretical_loss": 3.5052182139989707, "tokens_seen": 1706819584 }, { "epoch": 0.52, "learning_rate": 0.0004875621890547264, "loss": 0.0698, "theoretical_loss": 3.5051722963313723, "tokens_seen": 1707081728 }, { "epoch": 0.52, "learning_rate": 0.000487481945113144, "loss": 0.0671, "theoretical_loss": 3.505126387688473, "tokens_seen": 1707343872 }, { "epoch": 0.52, "learning_rate": 0.00048740170117156156, "loss": 0.0654, "theoretical_loss": 3.5050804880671134, "tokens_seen": 1707606016 }, { "epoch": 0.52, "learning_rate": 0.00048732145722997917, "loss": 0.0677, "theoretical_loss": 3.505034597464137, "tokens_seen": 1707868160 }, { "epoch": 0.52, "learning_rate": 0.00048724121328839673, "loss": 0.0691, "theoretical_loss": 3.5049887158763866, "tokens_seen": 1708130304 }, { "epoch": 0.52, "learning_rate": 0.0004871609693468143, "loss": 0.0658, "theoretical_loss": 3.504942843300709, "tokens_seen": 1708392448 }, { "epoch": 0.52, "learning_rate": 0.0004870807254052319, "loss": 0.0683, "theoretical_loss": 3.5048969797339513, "tokens_seen": 1708654592 }, { "epoch": 0.52, "learning_rate": 0.0004870004814636495, "loss": 0.0703, "theoretical_loss": 3.5048511251729626, "tokens_seen": 1708916736 }, { "epoch": 0.52, "learning_rate": 0.0004869202375220671, "loss": 0.068, "theoretical_loss": 3.504805279614594, "tokens_seen": 1709178880 }, { "epoch": 0.52, "learning_rate": 0.0004868399935804847, "loss": 0.0704, "theoretical_loss": 3.504759443055696, "tokens_seen": 1709441024 }, { "epoch": 0.52, "learning_rate": 0.0004867597496389023, "loss": 0.0679, "theoretical_loss": 3.5047136154931238, "tokens_seen": 1709703168 }, { "epoch": 0.52, "learning_rate": 0.00048667950569731986, "loss": 0.0696, "theoretical_loss": 3.5046677969237328, "tokens_seen": 1709965312 }, { "epoch": 0.52, "learning_rate": 0.0004865992617557374, "loss": 0.0679, "theoretical_loss": 3.504621987344379, "tokens_seen": 1710227456 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.0006008681957609951, "objective/train/docs_used": 624643, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3755950927734375, "objective/train/original_loss": 1.3755950927734375, "objective/train/theoretical_loss": 3.504576186751921, "objective/train/tokens_used": 1730949600, "objective/train/value_avg": -0.008392333984375, "objective/train/value_loss": 0.00020491065515670925, "objective/train/value_max": -4.267692565917969e-05, "objective/train/value_min": -0.37646484375, "objective/train/value_reward_corr": 0.7498691017721519, "objective/train/value_std": 0.0162200927734375, "objective/train/weight_avg": 1.0006961822509766, "objective/train/weighted_lm_loss": 1.3765302896499634, "objective/train/weights_max": 1.2192577123641968, "objective/train/weights_min": 0.37706345319747925, "theoretical_loss": 3.504576186751921, "tokens_seen": 1710489600 }, { "epoch": 0.52, "learning_rate": 0.000486519017814155, "loss": 0.0693, "theoretical_loss": 3.504576186751921, "tokens_seen": 1710489600 }, { "epoch": 0.52, "learning_rate": 0.00048643877387257264, "loss": 0.0698, "theoretical_loss": 3.5045303951432194, "tokens_seen": 1710751744 }, { "epoch": 0.52, "learning_rate": 0.0004863585299309902, "loss": 0.068, "theoretical_loss": 3.504484612515136, "tokens_seen": 1711013888 }, { "epoch": 0.52, "learning_rate": 0.0004862782859894078, "loss": 0.07, "theoretical_loss": 3.5044388388645333, "tokens_seen": 1711276032 }, { "epoch": 0.52, "learning_rate": 0.0004861980420478254, "loss": 0.0651, "theoretical_loss": 3.5043930741882763, "tokens_seen": 1711538176 }, { "epoch": 0.52, "learning_rate": 0.00048611779810624304, "loss": 0.0695, "theoretical_loss": 3.504347318483232, "tokens_seen": 1711800320 }, { "epoch": 0.52, "learning_rate": 0.00048603755416466054, "loss": 0.071, "theoretical_loss": 3.504301571746267, "tokens_seen": 1712062464 }, { "epoch": 0.52, "learning_rate": 0.00048595731022307815, "loss": 0.0694, "theoretical_loss": 3.504255833974252, "tokens_seen": 1712324608 }, { "epoch": 0.52, "learning_rate": 0.00048587706628149576, "loss": 0.0683, "theoretical_loss": 3.5042101051640575, "tokens_seen": 1712586752 }, { "epoch": 0.52, "learning_rate": 0.0004857968223399133, "loss": 0.0707, "theoretical_loss": 3.5041643853125564, "tokens_seen": 1712848896 }, { "epoch": 0.52, "learning_rate": 0.00048571657839833094, "loss": 0.0698, "theoretical_loss": 3.504118674416623, "tokens_seen": 1713111040 }, { "epoch": 0.52, "learning_rate": 0.00048563633445674855, "loss": 0.0706, "theoretical_loss": 3.504072972473133, "tokens_seen": 1713373184 }, { "epoch": 0.52, "learning_rate": 0.00048555609051516616, "loss": 0.0671, "theoretical_loss": 3.504027279478964, "tokens_seen": 1713635328 }, { "epoch": 0.52, "learning_rate": 0.00048547584657358366, "loss": 0.0689, "theoretical_loss": 3.5039815954309943, "tokens_seen": 1713897472 }, { "epoch": 0.52, "learning_rate": 0.0004853956026320013, "loss": 0.0678, "theoretical_loss": 3.5039359203261045, "tokens_seen": 1714159616 }, { "epoch": 0.52, "learning_rate": 0.0004853153586904189, "loss": 0.0678, "theoretical_loss": 3.5038902541611776, "tokens_seen": 1714421760 }, { "epoch": 0.52, "learning_rate": 0.00048523511474883645, "loss": 0.0685, "theoretical_loss": 3.503844596933096, "tokens_seen": 1714683904 }, { "epoch": 0.52, "learning_rate": 0.00048515487080725406, "loss": 0.0709, "theoretical_loss": 3.503798948638746, "tokens_seen": 1714946048 }, { "epoch": 0.52, "learning_rate": 0.0004850746268656717, "loss": 0.0715, "theoretical_loss": 3.503753309275013, "tokens_seen": 1715208192 }, { "epoch": 0.52, "learning_rate": 0.0004849943829240893, "loss": 0.072, "theoretical_loss": 3.5037076788387864, "tokens_seen": 1715470336 }, { "epoch": 0.52, "learning_rate": 0.00048491413898250684, "loss": 0.0689, "theoretical_loss": 3.503662057326956, "tokens_seen": 1715732480 }, { "epoch": 0.52, "learning_rate": 0.0004848338950409244, "loss": 0.0688, "theoretical_loss": 3.503616444736412, "tokens_seen": 1715994624 }, { "epoch": 0.52, "learning_rate": 0.000484753651099342, "loss": 0.0675, "theoretical_loss": 3.5035708410640494, "tokens_seen": 1716256768 }, { "epoch": 0.52, "learning_rate": 0.0004846734071577596, "loss": 0.0687, "theoretical_loss": 3.5035252463067614, "tokens_seen": 1716518912 }, { "epoch": 0.52, "learning_rate": 0.0004845931632161772, "loss": 0.0686, "theoretical_loss": 3.503479660461444, "tokens_seen": 1716781056 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.0005554624949581921, "objective/train/docs_used": 627098, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2948997020721436, "objective/train/original_loss": 1.2948997020721436, "objective/train/theoretical_loss": 3.5034340835249953, "objective/train/tokens_used": 1737503200, "objective/train/value_avg": -0.008697509765625, "objective/train/value_loss": 0.0003120089240837842, "objective/train/value_max": -3.2961368560791016e-05, "objective/train/value_min": -0.9716796875, "objective/train/value_reward_corr": 0.7148762752931084, "objective/train/value_std": 0.0185394287109375, "objective/train/weight_avg": 1.0006957054138184, "objective/train/weighted_lm_loss": 1.2951641082763672, "objective/train/weights_max": 2.2991042137145996, "objective/train/weights_min": 0.3745975196361542, "theoretical_loss": 3.5034340835249953, "tokens_seen": 1717043200 }, { "epoch": 0.52, "learning_rate": 0.0004845129192745948, "loss": 0.0663, "theoretical_loss": 3.5034340835249953, "tokens_seen": 1717043200 }, { "epoch": 0.52, "learning_rate": 0.00048443267533301236, "loss": 0.0684, "theoretical_loss": 3.503388515494315, "tokens_seen": 1717305344 }, { "epoch": 0.52, "learning_rate": 0.00048435243139142997, "loss": 0.0681, "theoretical_loss": 3.5033429563663026, "tokens_seen": 1717567488 }, { "epoch": 0.52, "learning_rate": 0.00048427218744984753, "loss": 0.0709, "theoretical_loss": 3.503297406137861, "tokens_seen": 1717829632 }, { "epoch": 0.52, "learning_rate": 0.00048419194350826514, "loss": 0.0691, "theoretical_loss": 3.5032518648058946, "tokens_seen": 1718091776 }, { "epoch": 0.52, "learning_rate": 0.0004841116995666827, "loss": 0.0664, "theoretical_loss": 3.503206332367308, "tokens_seen": 1718353920 }, { "epoch": 0.52, "learning_rate": 0.0004840314556251003, "loss": 0.0682, "theoretical_loss": 3.5031608088190085, "tokens_seen": 1718616064 }, { "epoch": 0.52, "learning_rate": 0.0004839512116835179, "loss": 0.0674, "theoretical_loss": 3.5031152941579045, "tokens_seen": 1718878208 }, { "epoch": 0.52, "learning_rate": 0.0004838709677419355, "loss": 0.0674, "theoretical_loss": 3.5030697883809063, "tokens_seen": 1719140352 }, { "epoch": 0.52, "learning_rate": 0.0004837907238003531, "loss": 0.0697, "theoretical_loss": 3.5030242914849254, "tokens_seen": 1719402496 }, { "epoch": 0.52, "learning_rate": 0.00048371047985877065, "loss": 0.0709, "theoretical_loss": 3.502978803466875, "tokens_seen": 1719664640 }, { "epoch": 0.52, "learning_rate": 0.00048363023591718827, "loss": 0.0704, "theoretical_loss": 3.502933324323669, "tokens_seen": 1719926784 }, { "epoch": 0.52, "learning_rate": 0.0004835499919756058, "loss": 0.0706, "theoretical_loss": 3.502887854052225, "tokens_seen": 1720188928 }, { "epoch": 0.52, "learning_rate": 0.00048346974803402344, "loss": 0.0689, "theoretical_loss": 3.5028423926494594, "tokens_seen": 1720451072 }, { "epoch": 0.52, "learning_rate": 0.00048338950409244105, "loss": 0.0686, "theoretical_loss": 3.5027969401122925, "tokens_seen": 1720713216 }, { "epoch": 0.52, "learning_rate": 0.0004833092601508586, "loss": 0.0688, "theoretical_loss": 3.5027514964376447, "tokens_seen": 1720975360 }, { "epoch": 0.52, "learning_rate": 0.0004832290162092762, "loss": 0.0703, "theoretical_loss": 3.502706061622438, "tokens_seen": 1721237504 }, { "epoch": 0.52, "learning_rate": 0.00048314877226769383, "loss": 0.0684, "theoretical_loss": 3.502660635663597, "tokens_seen": 1721499648 }, { "epoch": 0.52, "learning_rate": 0.0004830685283261114, "loss": 0.0699, "theoretical_loss": 3.5026152185580472, "tokens_seen": 1721761792 }, { "epoch": 0.52, "learning_rate": 0.00048298828438452895, "loss": 0.0686, "theoretical_loss": 3.502569810302715, "tokens_seen": 1722023936 }, { "epoch": 0.52, "learning_rate": 0.00048290804044294656, "loss": 0.0683, "theoretical_loss": 3.5025244108945293, "tokens_seen": 1722286080 }, { "epoch": 0.52, "learning_rate": 0.0004828277965013642, "loss": 0.0681, "theoretical_loss": 3.50247902033042, "tokens_seen": 1722548224 }, { "epoch": 0.52, "learning_rate": 0.00048274755255978173, "loss": 0.0704, "theoretical_loss": 3.502433638607319, "tokens_seen": 1722810368 }, { "epoch": 0.52, "learning_rate": 0.00048266730861819935, "loss": 0.0693, "theoretical_loss": 3.502388265722159, "tokens_seen": 1723072512 }, { "epoch": 0.52, "learning_rate": 0.00048258706467661696, "loss": 0.0699, "theoretical_loss": 3.5023429016718755, "tokens_seen": 1723334656 }, { "epoch": 0.52, "objective/train/advantage_avg": -2.2113810700830072e-05, "objective/train/docs_used": 629447, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4110980033874512, "objective/train/original_loss": 1.4110978841781616, "objective/train/theoretical_loss": 3.502297546453403, "objective/train/tokens_used": 1744056800, "objective/train/value_avg": -0.005741119384765625, "objective/train/value_loss": 0.00019440604955889285, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.241455078125, "objective/train/value_reward_corr": 0.6586684600559165, "objective/train/value_std": 0.01081085205078125, "objective/train/weight_avg": 1.0000641345977783, "objective/train/weighted_lm_loss": 1.411281704902649, "objective/train/weights_max": 1.1972782611846924, "objective/train/weights_min": 0.3683987259864807, "theoretical_loss": 3.502297546453403, "tokens_seen": 1723596800 }, { "epoch": 0.52, "learning_rate": 0.00048250682073503446, "loss": 0.0704, "theoretical_loss": 3.502297546453403, "tokens_seen": 1723596800 }, { "epoch": 0.52, "learning_rate": 0.0004824265767934521, "loss": 0.0682, "theoretical_loss": 3.5022522000636807, "tokens_seen": 1723858944 }, { "epoch": 0.52, "learning_rate": 0.0004823463328518697, "loss": 0.0701, "theoretical_loss": 3.502206862499647, "tokens_seen": 1724121088 }, { "epoch": 0.52, "learning_rate": 0.0004822660889102873, "loss": 0.0683, "theoretical_loss": 3.5021615337582435, "tokens_seen": 1724383232 }, { "epoch": 0.52, "learning_rate": 0.00048218584496870486, "loss": 0.0686, "theoretical_loss": 3.5021162138364117, "tokens_seen": 1724645376 }, { "epoch": 0.52, "learning_rate": 0.0004821056010271225, "loss": 0.0692, "theoretical_loss": 3.502070902731096, "tokens_seen": 1724907520 }, { "epoch": 0.52, "learning_rate": 0.0004820253570855401, "loss": 0.0715, "theoretical_loss": 3.502025600439241, "tokens_seen": 1725169664 }, { "epoch": 0.52, "learning_rate": 0.0004819451131439576, "loss": 0.0717, "theoretical_loss": 3.5019803069577944, "tokens_seen": 1725431808 }, { "epoch": 0.52, "learning_rate": 0.0004818648692023752, "loss": 0.0693, "theoretical_loss": 3.5019350222837042, "tokens_seen": 1725693952 }, { "epoch": 0.52, "learning_rate": 0.0004817846252607928, "loss": 0.07, "theoretical_loss": 3.5018897464139203, "tokens_seen": 1725956096 }, { "epoch": 0.52, "learning_rate": 0.00048170438131921043, "loss": 0.0717, "theoretical_loss": 3.501844479345394, "tokens_seen": 1726218240 }, { "epoch": 0.52, "learning_rate": 0.000481624137377628, "loss": 0.069, "theoretical_loss": 3.5017992210750783, "tokens_seen": 1726480384 }, { "epoch": 0.52, "learning_rate": 0.0004815438934360456, "loss": 0.0716, "theoretical_loss": 3.5017539715999275, "tokens_seen": 1726742528 }, { "epoch": 0.52, "learning_rate": 0.0004814636494944632, "loss": 0.0697, "theoretical_loss": 3.5017087309168975, "tokens_seen": 1727004672 }, { "epoch": 0.52, "learning_rate": 0.00048138340555288077, "loss": 0.0706, "theoretical_loss": 3.501663499022947, "tokens_seen": 1727266816 }, { "epoch": 0.52, "learning_rate": 0.00048130316161129833, "loss": 0.068, "theoretical_loss": 3.5016182759150336, "tokens_seen": 1727528960 }, { "epoch": 0.52, "learning_rate": 0.00048122291766971594, "loss": 0.0687, "theoretical_loss": 3.501573061590118, "tokens_seen": 1727791104 }, { "epoch": 0.52, "learning_rate": 0.00048114267372813355, "loss": 0.0678, "theoretical_loss": 3.5015278560451626, "tokens_seen": 1728053248 }, { "epoch": 0.52, "learning_rate": 0.0004810624297865511, "loss": 0.069, "theoretical_loss": 3.501482659277131, "tokens_seen": 1728315392 }, { "epoch": 0.52, "learning_rate": 0.0004809821858449687, "loss": 0.0698, "theoretical_loss": 3.501437471282988, "tokens_seen": 1728577536 }, { "epoch": 0.52, "learning_rate": 0.00048090194190338634, "loss": 0.067, "theoretical_loss": 3.5013922920597, "tokens_seen": 1728839680 }, { "epoch": 0.52, "learning_rate": 0.0004808216979618039, "loss": 0.0687, "theoretical_loss": 3.5013471216042356, "tokens_seen": 1729101824 }, { "epoch": 0.52, "learning_rate": 0.00048074145402022145, "loss": 0.0665, "theoretical_loss": 3.5013019599135635, "tokens_seen": 1729363968 }, { "epoch": 0.52, "learning_rate": 0.00048066121007863907, "loss": 0.0698, "theoretical_loss": 3.501256806984656, "tokens_seen": 1729626112 }, { "epoch": 0.52, "learning_rate": 0.0004805809661370567, "loss": 0.0698, "theoretical_loss": 3.5012116628144847, "tokens_seen": 1729888256 }, { "epoch": 0.52, "objective/train/advantage_avg": -0.00018954773258883506, "objective/train/docs_used": 631803, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4000455141067505, "objective/train/original_loss": 1.4000455141067505, "objective/train/theoretical_loss": 3.5011665274000245, "objective/train/tokens_used": 1750610400, "objective/train/value_avg": -0.00847625732421875, "objective/train/value_loss": 0.0005068992613814771, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.79736328125, "objective/train/value_reward_corr": 0.705849457365212, "objective/train/value_std": 0.02032470703125, "objective/train/weight_avg": 1.0000356435775757, "objective/train/weighted_lm_loss": 1.4004305601119995, "objective/train/weights_max": 1.8320720195770264, "objective/train/weights_min": 0.37181854248046875, "theoretical_loss": 3.5011665274000245, "tokens_seen": 1730150400 }, { "epoch": 0.52, "learning_rate": 0.00048050072219547424, "loss": 0.0667, "theoretical_loss": 3.5011665274000245, "tokens_seen": 1730150400 }, { "epoch": 0.52, "learning_rate": 0.00048042047825389185, "loss": 0.0721, "theoretical_loss": 3.50112140073825, "tokens_seen": 1730412544 }, { "epoch": 0.52, "learning_rate": 0.00048034023431230946, "loss": 0.0692, "theoretical_loss": 3.501076282826139, "tokens_seen": 1730674688 }, { "epoch": 0.52, "learning_rate": 0.000480259990370727, "loss": 0.0694, "theoretical_loss": 3.5010311736606696, "tokens_seen": 1730936832 }, { "epoch": 0.52, "learning_rate": 0.00048017974642914463, "loss": 0.0709, "theoretical_loss": 3.5009860732388223, "tokens_seen": 1731198976 }, { "epoch": 0.52, "learning_rate": 0.0004800995024875622, "loss": 0.0705, "theoretical_loss": 3.5009409815575787, "tokens_seen": 1731461120 }, { "epoch": 0.52, "learning_rate": 0.00048001925854597975, "loss": 0.0676, "theoretical_loss": 3.500895898613922, "tokens_seen": 1731723264 }, { "epoch": 0.52, "learning_rate": 0.00047993901460439736, "loss": 0.0709, "theoretical_loss": 3.5008508244048366, "tokens_seen": 1731985408 }, { "epoch": 0.52, "learning_rate": 0.000479858770662815, "loss": 0.0688, "theoretical_loss": 3.5008057589273083, "tokens_seen": 1732247552 }, { "epoch": 0.53, "learning_rate": 0.0004797785267212326, "loss": 0.0674, "theoretical_loss": 3.500760702178325, "tokens_seen": 1732509696 }, { "epoch": 0.53, "learning_rate": 0.00047969828277965015, "loss": 0.0655, "theoretical_loss": 3.5007156541548756, "tokens_seen": 1732771840 }, { "epoch": 0.53, "learning_rate": 0.00047961803883806776, "loss": 0.0708, "theoretical_loss": 3.5006706148539513, "tokens_seen": 1733033984 }, { "epoch": 0.53, "learning_rate": 0.0004795377948964853, "loss": 0.0717, "theoretical_loss": 3.500625584272543, "tokens_seen": 1733296128 }, { "epoch": 0.53, "learning_rate": 0.0004794575509549029, "loss": 0.0682, "theoretical_loss": 3.5005805624076456, "tokens_seen": 1733558272 }, { "epoch": 0.53, "learning_rate": 0.0004793773070133205, "loss": 0.0688, "theoretical_loss": 3.500535549256253, "tokens_seen": 1733820416 }, { "epoch": 0.53, "learning_rate": 0.0004792970630717381, "loss": 0.0672, "theoretical_loss": 3.5004905448153627, "tokens_seen": 1734082560 }, { "epoch": 0.53, "learning_rate": 0.0004792168191301557, "loss": 0.0705, "theoretical_loss": 3.500445549081972, "tokens_seen": 1734344704 }, { "epoch": 0.53, "learning_rate": 0.00047913657518857327, "loss": 0.0691, "theoretical_loss": 3.500400562053081, "tokens_seen": 1734606848 }, { "epoch": 0.53, "learning_rate": 0.0004790563312469909, "loss": 0.0699, "theoretical_loss": 3.50035558372569, "tokens_seen": 1734868992 }, { "epoch": 0.53, "learning_rate": 0.00047897608730540844, "loss": 0.071, "theoretical_loss": 3.5003106140968026, "tokens_seen": 1735131136 }, { "epoch": 0.53, "learning_rate": 0.000478895843363826, "loss": 0.0701, "theoretical_loss": 3.5002656531634213, "tokens_seen": 1735393280 }, { "epoch": 0.53, "learning_rate": 0.0004788155994222436, "loss": 0.0693, "theoretical_loss": 3.5002207009225526, "tokens_seen": 1735655424 }, { "epoch": 0.53, "learning_rate": 0.0004787353554806612, "loss": 0.0684, "theoretical_loss": 3.5001757573712036, "tokens_seen": 1735917568 }, { "epoch": 0.53, "learning_rate": 0.00047865511153907884, "loss": 0.0701, "theoretical_loss": 3.5001308225063816, "tokens_seen": 1736179712 }, { "epoch": 0.53, "learning_rate": 0.0004785748675974964, "loss": 0.0689, "theoretical_loss": 3.500085896325098, "tokens_seen": 1736441856 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0006846745382063091, "objective/train/docs_used": 634251, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4682844877243042, "objective/train/original_loss": 1.4682844877243042, "objective/train/theoretical_loss": 3.5000409788243623, "objective/train/tokens_used": 1757164000, "objective/train/value_avg": -0.00629425048828125, "objective/train/value_loss": 0.00014210244989953935, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.6083984375, "objective/train/value_reward_corr": 0.7120896194157069, "objective/train/value_std": 0.0111541748046875, "objective/train/weight_avg": 1.0007526874542236, "objective/train/weighted_lm_loss": 1.4696582555770874, "objective/train/weights_max": 1.7635035514831543, "objective/train/weights_min": 0.37317126989364624, "theoretical_loss": 3.5000409788243623, "tokens_seen": 1736704000 }, { "epoch": 0.53, "learning_rate": 0.000478494623655914, "loss": 0.0691, "theoretical_loss": 3.5000409788243623, "tokens_seen": 1736704000 }, { "epoch": 0.53, "learning_rate": 0.0004784143797143316, "loss": 0.0707, "theoretical_loss": 3.4999960700011896, "tokens_seen": 1736966144 }, { "epoch": 0.53, "learning_rate": 0.00047833413577274913, "loss": 0.0697, "theoretical_loss": 3.4999511698525927, "tokens_seen": 1737228288 }, { "epoch": 0.53, "learning_rate": 0.00047825389183116674, "loss": 0.0692, "theoretical_loss": 3.4999062783755877, "tokens_seen": 1737490432 }, { "epoch": 0.53, "learning_rate": 0.00047817364788958435, "loss": 0.0675, "theoretical_loss": 3.4998613955671924, "tokens_seen": 1737752576 }, { "epoch": 0.53, "learning_rate": 0.0004780934039480019, "loss": 0.0682, "theoretical_loss": 3.4998165214244246, "tokens_seen": 1738014720 }, { "epoch": 0.53, "learning_rate": 0.0004780131600064195, "loss": 0.0694, "theoretical_loss": 3.499771655944306, "tokens_seen": 1738276864 }, { "epoch": 0.53, "learning_rate": 0.00047793291606483714, "loss": 0.0682, "theoretical_loss": 3.4997267991238568, "tokens_seen": 1738539008 }, { "epoch": 0.53, "learning_rate": 0.00047785267212325475, "loss": 0.0669, "theoretical_loss": 3.4996819509601016, "tokens_seen": 1738801152 }, { "epoch": 0.53, "learning_rate": 0.00047777242818167225, "loss": 0.0672, "theoretical_loss": 3.499637111450064, "tokens_seen": 1739063296 }, { "epoch": 0.53, "learning_rate": 0.00047769218424008987, "loss": 0.0707, "theoretical_loss": 3.499592280590771, "tokens_seen": 1739325440 }, { "epoch": 0.53, "learning_rate": 0.0004776119402985075, "loss": 0.071, "theoretical_loss": 3.499547458379249, "tokens_seen": 1739587584 }, { "epoch": 0.53, "learning_rate": 0.00047753169635692504, "loss": 0.0693, "theoretical_loss": 3.4995026448125284, "tokens_seen": 1739849728 }, { "epoch": 0.53, "learning_rate": 0.00047745145241534265, "loss": 0.0669, "theoretical_loss": 3.499457839887639, "tokens_seen": 1740111872 }, { "epoch": 0.53, "learning_rate": 0.00047737120847376026, "loss": 0.0674, "theoretical_loss": 3.4994130436016126, "tokens_seen": 1740374016 }, { "epoch": 0.53, "learning_rate": 0.0004772909645321779, "loss": 0.0694, "theoretical_loss": 3.4993682559514836, "tokens_seen": 1740636160 }, { "epoch": 0.53, "learning_rate": 0.0004772107205905954, "loss": 0.0689, "theoretical_loss": 3.4993234769342862, "tokens_seen": 1740898304 }, { "epoch": 0.53, "learning_rate": 0.000477130476649013, "loss": 0.0674, "theoretical_loss": 3.4992787065470567, "tokens_seen": 1741160448 }, { "epoch": 0.53, "learning_rate": 0.0004770502327074306, "loss": 0.0674, "theoretical_loss": 3.499233944786834, "tokens_seen": 1741422592 }, { "epoch": 0.53, "learning_rate": 0.00047696998876584816, "loss": 0.068, "theoretical_loss": 3.499189191650656, "tokens_seen": 1741684736 }, { "epoch": 0.53, "learning_rate": 0.0004768897448242658, "loss": 0.0684, "theoretical_loss": 3.499144447135565, "tokens_seen": 1741946880 }, { "epoch": 0.53, "learning_rate": 0.0004768095008826834, "loss": 0.0716, "theoretical_loss": 3.499099711238602, "tokens_seen": 1742209024 }, { "epoch": 0.53, "learning_rate": 0.000476729256941101, "loss": 0.0676, "theoretical_loss": 3.4990549839568112, "tokens_seen": 1742471168 }, { "epoch": 0.53, "learning_rate": 0.00047664901299951856, "loss": 0.0704, "theoretical_loss": 3.4990102652872377, "tokens_seen": 1742733312 }, { "epoch": 0.53, "learning_rate": 0.0004765687690579361, "loss": 0.0712, "theoretical_loss": 3.4989655552269285, "tokens_seen": 1742995456 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0005523924482986331, "objective/train/docs_used": 636817, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.377171277999878, "objective/train/original_loss": 1.377171277999878, "objective/train/theoretical_loss": 3.4989208537729315, "objective/train/tokens_used": 1763717600, "objective/train/value_avg": -0.00604248046875, "objective/train/value_loss": 0.00014253681001719087, "objective/train/value_max": -4.57763671875e-05, "objective/train/value_min": -0.94677734375, "objective/train/value_reward_corr": 0.7605732795954262, "objective/train/value_std": 0.01490020751953125, "objective/train/weight_avg": 1.0006181001663208, "objective/train/weighted_lm_loss": 1.378185510635376, "objective/train/weights_max": 1.3232762813568115, "objective/train/weights_min": 0.37375253438949585, "theoretical_loss": 3.4989208537729315, "tokens_seen": 1743257600 }, { "epoch": 0.53, "learning_rate": 0.00047648852511635373, "loss": 0.0709, "theoretical_loss": 3.4989208537729315, "tokens_seen": 1743257600 }, { "epoch": 0.53, "learning_rate": 0.0004764082811747713, "loss": 0.0703, "theoretical_loss": 3.498876160922296, "tokens_seen": 1743519744 }, { "epoch": 0.53, "learning_rate": 0.0004763280372331889, "loss": 0.0697, "theoretical_loss": 3.4988314766720734, "tokens_seen": 1743781888 }, { "epoch": 0.53, "learning_rate": 0.0004762477932916065, "loss": 0.0704, "theoretical_loss": 3.4987868010193157, "tokens_seen": 1744044032 }, { "epoch": 0.53, "learning_rate": 0.00047616754935002407, "loss": 0.0647, "theoretical_loss": 3.4987421339610774, "tokens_seen": 1744306176 }, { "epoch": 0.53, "learning_rate": 0.0004760873054084417, "loss": 0.0682, "theoretical_loss": 3.498697475494413, "tokens_seen": 1744568320 }, { "epoch": 0.53, "learning_rate": 0.00047600706146685924, "loss": 0.0694, "theoretical_loss": 3.49865282561638, "tokens_seen": 1744830464 }, { "epoch": 0.53, "learning_rate": 0.00047592681752527686, "loss": 0.07, "theoretical_loss": 3.498608184324037, "tokens_seen": 1745092608 }, { "epoch": 0.53, "learning_rate": 0.0004758465735836944, "loss": 0.0699, "theoretical_loss": 3.498563551614443, "tokens_seen": 1745354752 }, { "epoch": 0.53, "learning_rate": 0.000475766329642112, "loss": 0.0685, "theoretical_loss": 3.4985189274846586, "tokens_seen": 1745616896 }, { "epoch": 0.53, "learning_rate": 0.00047568608570052964, "loss": 0.0711, "theoretical_loss": 3.498474311931748, "tokens_seen": 1745879040 }, { "epoch": 0.53, "learning_rate": 0.0004756058417589472, "loss": 0.071, "theoretical_loss": 3.4984297049527737, "tokens_seen": 1746141184 }, { "epoch": 0.53, "learning_rate": 0.0004755255978173648, "loss": 0.0696, "theoretical_loss": 3.498385106544802, "tokens_seen": 1746403328 }, { "epoch": 0.53, "learning_rate": 0.0004754453538757824, "loss": 0.0667, "theoretical_loss": 3.4983405167049004, "tokens_seen": 1746665472 }, { "epoch": 0.53, "learning_rate": 0.0004753651099342, "loss": 0.0692, "theoretical_loss": 3.4982959354301366, "tokens_seen": 1746927616 }, { "epoch": 0.53, "learning_rate": 0.00047528486599261754, "loss": 0.0651, "theoretical_loss": 3.49825136271758, "tokens_seen": 1747189760 }, { "epoch": 0.53, "learning_rate": 0.00047520462205103515, "loss": 0.0704, "theoretical_loss": 3.498206798564303, "tokens_seen": 1747451904 }, { "epoch": 0.53, "learning_rate": 0.00047512437810945276, "loss": 0.0688, "theoretical_loss": 3.498162242967377, "tokens_seen": 1747714048 }, { "epoch": 0.53, "learning_rate": 0.0004750441341678703, "loss": 0.0699, "theoretical_loss": 3.4981176959238773, "tokens_seen": 1747976192 }, { "epoch": 0.53, "learning_rate": 0.00047496389022628794, "loss": 0.0676, "theoretical_loss": 3.4980731574308788, "tokens_seen": 1748238336 }, { "epoch": 0.53, "learning_rate": 0.00047488364628470555, "loss": 0.0657, "theoretical_loss": 3.498028627485459, "tokens_seen": 1748500480 }, { "epoch": 0.53, "learning_rate": 0.0004748034023431231, "loss": 0.0687, "theoretical_loss": 3.497984106084696, "tokens_seen": 1748762624 }, { "epoch": 0.53, "learning_rate": 0.00047472315840154066, "loss": 0.0716, "theoretical_loss": 3.4979395932256696, "tokens_seen": 1749024768 }, { "epoch": 0.53, "learning_rate": 0.0004746429144599583, "loss": 0.0677, "theoretical_loss": 3.4978950889054614, "tokens_seen": 1749286912 }, { "epoch": 0.53, "learning_rate": 0.0004745626705183759, "loss": 0.0684, "theoretical_loss": 3.4978505931211545, "tokens_seen": 1749549056 }, { "epoch": 0.53, "objective/train/advantage_avg": 4.875126978731714e-05, "objective/train/docs_used": 639149, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4085463285446167, "objective/train/original_loss": 1.4085463285446167, "objective/train/theoretical_loss": 3.4978061058698326, "objective/train/tokens_used": 1770271200, "objective/train/value_avg": -0.00791168212890625, "objective/train/value_loss": 0.00021851272322237492, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.2459716796875, "objective/train/value_reward_corr": 0.7395755158288367, "objective/train/value_std": 0.015655517578125, "objective/train/weight_avg": 1.0001466274261475, "objective/train/weighted_lm_loss": 1.4086047410964966, "objective/train/weights_max": 1.2113949060440063, "objective/train/weights_min": 0.36827927827835083, "theoretical_loss": 3.4978061058698326, "tokens_seen": 1749811200 }, { "epoch": 0.53, "learning_rate": 0.00047448242657679345, "loss": 0.0693, "theoretical_loss": 3.4978061058698326, "tokens_seen": 1749811200 }, { "epoch": 0.53, "learning_rate": 0.00047440218263521106, "loss": 0.072, "theoretical_loss": 3.4977616271485816, "tokens_seen": 1750073344 }, { "epoch": 0.53, "learning_rate": 0.0004743219386936287, "loss": 0.0688, "theoretical_loss": 3.4977171569544883, "tokens_seen": 1750335488 }, { "epoch": 0.53, "learning_rate": 0.0004742416947520462, "loss": 0.0672, "theoretical_loss": 3.497672695284641, "tokens_seen": 1750597632 }, { "epoch": 0.53, "learning_rate": 0.0004741614508104638, "loss": 0.0684, "theoretical_loss": 3.497628242136131, "tokens_seen": 1750859776 }, { "epoch": 0.53, "learning_rate": 0.0004740812068688814, "loss": 0.0676, "theoretical_loss": 3.497583797506048, "tokens_seen": 1751121920 }, { "epoch": 0.53, "learning_rate": 0.000474000962927299, "loss": 0.0701, "theoretical_loss": 3.4975393613914854, "tokens_seen": 1751384064 }, { "epoch": 0.53, "learning_rate": 0.0004739207189857166, "loss": 0.0691, "theoretical_loss": 3.497494933789538, "tokens_seen": 1751646208 }, { "epoch": 0.53, "learning_rate": 0.0004738404750441342, "loss": 0.071, "theoretical_loss": 3.4974505146973005, "tokens_seen": 1751908352 }, { "epoch": 0.53, "learning_rate": 0.0004737602311025518, "loss": 0.0722, "theoretical_loss": 3.497406104111871, "tokens_seen": 1752170496 }, { "epoch": 0.53, "learning_rate": 0.00047367998716096936, "loss": 0.0709, "theoretical_loss": 3.4973617020303465, "tokens_seen": 1752432640 }, { "epoch": 0.53, "learning_rate": 0.0004735997432193869, "loss": 0.0704, "theoretical_loss": 3.497317308449828, "tokens_seen": 1752694784 }, { "epoch": 0.53, "learning_rate": 0.00047351949927780453, "loss": 0.0659, "theoretical_loss": 3.4972729233674174, "tokens_seen": 1752956928 }, { "epoch": 0.53, "learning_rate": 0.00047343925533622214, "loss": 0.0681, "theoretical_loss": 3.4972285467802164, "tokens_seen": 1753219072 }, { "epoch": 0.53, "learning_rate": 0.0004733590113946397, "loss": 0.0708, "theoretical_loss": 3.4971841786853295, "tokens_seen": 1753481216 }, { "epoch": 0.53, "learning_rate": 0.0004732787674530573, "loss": 0.0707, "theoretical_loss": 3.4971398190798615, "tokens_seen": 1753743360 }, { "epoch": 0.53, "learning_rate": 0.0004731985235114749, "loss": 0.0687, "theoretical_loss": 3.4970954679609214, "tokens_seen": 1754005504 }, { "epoch": 0.53, "learning_rate": 0.0004731182795698925, "loss": 0.0725, "theoretical_loss": 3.4970511253256156, "tokens_seen": 1754267648 }, { "epoch": 0.53, "learning_rate": 0.00047303803562831004, "loss": 0.0718, "theoretical_loss": 3.4970067911710556, "tokens_seen": 1754529792 }, { "epoch": 0.53, "learning_rate": 0.00047295779168672765, "loss": 0.0726, "theoretical_loss": 3.4969624654943514, "tokens_seen": 1754791936 }, { "epoch": 0.53, "learning_rate": 0.00047287754774514527, "loss": 0.0696, "theoretical_loss": 3.496918148292616, "tokens_seen": 1755054080 }, { "epoch": 0.53, "learning_rate": 0.0004727973038035628, "loss": 0.0701, "theoretical_loss": 3.496873839562964, "tokens_seen": 1755316224 }, { "epoch": 0.53, "learning_rate": 0.00047271705986198044, "loss": 0.0692, "theoretical_loss": 3.49682953930251, "tokens_seen": 1755578368 }, { "epoch": 0.53, "learning_rate": 0.00047263681592039805, "loss": 0.0686, "theoretical_loss": 3.4967852475083725, "tokens_seen": 1755840512 }, { "epoch": 0.53, "learning_rate": 0.0004725565719788156, "loss": 0.07, "theoretical_loss": 3.4967409641776683, "tokens_seen": 1756102656 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.000851556018460542, "objective/train/docs_used": 641471, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.289823293685913, "objective/train/original_loss": 1.289823293685913, "objective/train/theoretical_loss": 3.4966966893075178, "objective/train/tokens_used": 1776824800, "objective/train/value_avg": -0.00839996337890625, "objective/train/value_loss": 0.00039937844849191606, "objective/train/value_max": -6.014108657836914e-05, "objective/train/value_min": -0.80615234375, "objective/train/value_reward_corr": 0.7816001366928128, "objective/train/value_std": 0.0249481201171875, "objective/train/weight_avg": 1.0010340213775635, "objective/train/weighted_lm_loss": 1.2905949354171753, "objective/train/weights_max": 1.8249680995941162, "objective/train/weights_min": 0.36997780203819275, "theoretical_loss": 3.4966966893075178, "tokens_seen": 1756364800 }, { "epoch": 0.53, "learning_rate": 0.00047247632803723317, "loss": 0.0674, "theoretical_loss": 3.4966966893075178, "tokens_seen": 1756364800 }, { "epoch": 0.53, "learning_rate": 0.0004723960840956508, "loss": 0.0704, "theoretical_loss": 3.496652422895042, "tokens_seen": 1756626944 }, { "epoch": 0.53, "learning_rate": 0.0004723158401540684, "loss": 0.0691, "theoretical_loss": 3.496608164937364, "tokens_seen": 1756889088 }, { "epoch": 0.53, "learning_rate": 0.00047223559621248595, "loss": 0.0706, "theoretical_loss": 3.496563915431607, "tokens_seen": 1757151232 }, { "epoch": 0.53, "learning_rate": 0.00047215535227090356, "loss": 0.0707, "theoretical_loss": 3.4965196743748965, "tokens_seen": 1757413376 }, { "epoch": 0.53, "learning_rate": 0.0004720751083293212, "loss": 0.0709, "theoretical_loss": 3.49647544176436, "tokens_seen": 1757675520 }, { "epoch": 0.53, "learning_rate": 0.00047199486438773873, "loss": 0.0731, "theoretical_loss": 3.4964312175971246, "tokens_seen": 1757937664 }, { "epoch": 0.53, "learning_rate": 0.00047191462044615635, "loss": 0.0721, "theoretical_loss": 3.496387001870321, "tokens_seen": 1758199808 }, { "epoch": 0.53, "learning_rate": 0.0004718343765045739, "loss": 0.0707, "theoretical_loss": 3.49634279458108, "tokens_seen": 1758461952 }, { "epoch": 0.53, "learning_rate": 0.00047175413256299146, "loss": 0.0684, "theoretical_loss": 3.4962985957265333, "tokens_seen": 1758724096 }, { "epoch": 0.53, "learning_rate": 0.0004716738886214091, "loss": 0.0681, "theoretical_loss": 3.496254405303815, "tokens_seen": 1758986240 }, { "epoch": 0.53, "learning_rate": 0.0004715936446798267, "loss": 0.0704, "theoretical_loss": 3.4962102233100607, "tokens_seen": 1759248384 }, { "epoch": 0.53, "learning_rate": 0.0004715134007382443, "loss": 0.0728, "theoretical_loss": 3.4961660497424063, "tokens_seen": 1759510528 }, { "epoch": 0.53, "learning_rate": 0.00047143315679666186, "loss": 0.0702, "theoretical_loss": 3.496121884597991, "tokens_seen": 1759772672 }, { "epoch": 0.53, "learning_rate": 0.0004713529128550795, "loss": 0.0698, "theoretical_loss": 3.4960777278739528, "tokens_seen": 1760034816 }, { "epoch": 0.53, "learning_rate": 0.00047127266891349703, "loss": 0.0722, "theoretical_loss": 3.4960335795674338, "tokens_seen": 1760296960 }, { "epoch": 0.53, "learning_rate": 0.0004711924249719146, "loss": 0.0703, "theoretical_loss": 3.495989439675575, "tokens_seen": 1760559104 }, { "epoch": 0.53, "learning_rate": 0.0004711121810303322, "loss": 0.07, "theoretical_loss": 3.4959453081955205, "tokens_seen": 1760821248 }, { "epoch": 0.53, "learning_rate": 0.0004710319370887498, "loss": 0.0687, "theoretical_loss": 3.495901185124416, "tokens_seen": 1761083392 }, { "epoch": 0.53, "learning_rate": 0.00047095169314716743, "loss": 0.0692, "theoretical_loss": 3.4958570704594067, "tokens_seen": 1761345536 }, { "epoch": 0.53, "learning_rate": 0.000470871449205585, "loss": 0.0685, "theoretical_loss": 3.495812964197641, "tokens_seen": 1761607680 }, { "epoch": 0.53, "learning_rate": 0.0004707912052640026, "loss": 0.0701, "theoretical_loss": 3.4957688663362685, "tokens_seen": 1761869824 }, { "epoch": 0.53, "learning_rate": 0.0004707109613224202, "loss": 0.0717, "theoretical_loss": 3.495724776872439, "tokens_seen": 1762131968 }, { "epoch": 0.53, "learning_rate": 0.0004706307173808377, "loss": 0.071, "theoretical_loss": 3.4956806958033044, "tokens_seen": 1762394112 }, { "epoch": 0.53, "learning_rate": 0.00047055047343925533, "loss": 0.0677, "theoretical_loss": 3.4956366231260185, "tokens_seen": 1762656256 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0005501354462467134, "objective/train/docs_used": 643830, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2951899766921997, "objective/train/original_loss": 1.2951900959014893, "objective/train/theoretical_loss": 3.4955925588377363, "objective/train/tokens_used": 1783378400, "objective/train/value_avg": -0.005947113037109375, "objective/train/value_loss": 0.0003123681526631117, "objective/train/value_max": -4.57763671875e-05, "objective/train/value_min": -0.490478515625, "objective/train/value_reward_corr": 0.5554375514170562, "objective/train/value_std": 0.01132965087890625, "objective/train/weight_avg": 1.00067937374115, "objective/train/weighted_lm_loss": 1.296012282371521, "objective/train/weights_max": 1.4989490509033203, "objective/train/weights_min": 0.1461181342601776, "theoretical_loss": 3.4955925588377363, "tokens_seen": 1762918400 }, { "epoch": 0.53, "learning_rate": 0.00047047022949767294, "loss": 0.0682, "theoretical_loss": 3.4955925588377363, "tokens_seen": 1762918400 }, { "epoch": 0.53, "learning_rate": 0.00047038998555609055, "loss": 0.0729, "theoretical_loss": 3.4955485029356135, "tokens_seen": 1763180544 }, { "epoch": 0.53, "learning_rate": 0.0004703097416145081, "loss": 0.0694, "theoretical_loss": 3.495504455416807, "tokens_seen": 1763442688 }, { "epoch": 0.53, "learning_rate": 0.0004702294976729257, "loss": 0.0674, "theoretical_loss": 3.495460416278477, "tokens_seen": 1763704832 }, { "epoch": 0.53, "learning_rate": 0.00047014925373134334, "loss": 0.0691, "theoretical_loss": 3.4954163855177827, "tokens_seen": 1763966976 }, { "epoch": 0.53, "learning_rate": 0.00047006900978976084, "loss": 0.0702, "theoretical_loss": 3.495372363131886, "tokens_seen": 1764229120 }, { "epoch": 0.53, "learning_rate": 0.00046998876584817845, "loss": 0.0665, "theoretical_loss": 3.4953283491179503, "tokens_seen": 1764491264 }, { "epoch": 0.53, "learning_rate": 0.00046990852190659607, "loss": 0.0695, "theoretical_loss": 3.4952843434731395, "tokens_seen": 1764753408 }, { "epoch": 0.53, "learning_rate": 0.0004698282779650136, "loss": 0.0698, "theoretical_loss": 3.49524034619462, "tokens_seen": 1765015552 }, { "epoch": 0.53, "learning_rate": 0.00046974803402343124, "loss": 0.0717, "theoretical_loss": 3.495196357279559, "tokens_seen": 1765277696 }, { "epoch": 0.54, "learning_rate": 0.00046966779008184885, "loss": 0.0716, "theoretical_loss": 3.495152376725124, "tokens_seen": 1765539840 }, { "epoch": 0.54, "learning_rate": 0.00046958754614026646, "loss": 0.0662, "theoretical_loss": 3.4951084045284864, "tokens_seen": 1765801984 }, { "epoch": 0.54, "learning_rate": 0.00046950730219868397, "loss": 0.0698, "theoretical_loss": 3.495064440686816, "tokens_seen": 1766064128 }, { "epoch": 0.54, "learning_rate": 0.0004694270582571016, "loss": 0.0699, "theoretical_loss": 3.495020485197287, "tokens_seen": 1766326272 }, { "epoch": 0.54, "learning_rate": 0.0004693468143155192, "loss": 0.0703, "theoretical_loss": 3.494976538057073, "tokens_seen": 1766588416 }, { "epoch": 0.54, "learning_rate": 0.00046926657037393675, "loss": 0.0709, "theoretical_loss": 3.4949325992633486, "tokens_seen": 1766850560 }, { "epoch": 0.54, "learning_rate": 0.00046918632643235436, "loss": 0.0706, "theoretical_loss": 3.4948886688132923, "tokens_seen": 1767112704 }, { "epoch": 0.54, "learning_rate": 0.000469106082490772, "loss": 0.0701, "theoretical_loss": 3.4948447467040804, "tokens_seen": 1767374848 }, { "epoch": 0.54, "learning_rate": 0.0004690258385491896, "loss": 0.0694, "theoretical_loss": 3.494800832932894, "tokens_seen": 1767636992 }, { "epoch": 0.54, "learning_rate": 0.00046894559460760715, "loss": 0.0662, "theoretical_loss": 3.494756927496913, "tokens_seen": 1767899136 }, { "epoch": 0.54, "learning_rate": 0.0004688653506660247, "loss": 0.0684, "theoretical_loss": 3.49471303039332, "tokens_seen": 1768161280 }, { "epoch": 0.54, "learning_rate": 0.0004687851067244423, "loss": 0.0707, "theoretical_loss": 3.4946691416192985, "tokens_seen": 1768423424 }, { "epoch": 0.54, "learning_rate": 0.0004687048627828599, "loss": 0.069, "theoretical_loss": 3.4946252611720348, "tokens_seen": 1768685568 }, { "epoch": 0.54, "learning_rate": 0.0004686246188412775, "loss": 0.0706, "theoretical_loss": 3.494581389048714, "tokens_seen": 1768947712 }, { "epoch": 0.54, "learning_rate": 0.0004685443748996951, "loss": 0.0695, "theoretical_loss": 3.494537525246524, "tokens_seen": 1769209856 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.0003884284815285355, "objective/train/docs_used": 646001, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2914732694625854, "objective/train/original_loss": 1.2914732694625854, "objective/train/theoretical_loss": 3.4944936697626545, "objective/train/tokens_used": 1789932000, "objective/train/value_avg": -0.00746917724609375, "objective/train/value_loss": 0.0005339648341760039, "objective/train/value_max": -4.035234451293945e-05, "objective/train/value_min": -0.84521484375, "objective/train/value_reward_corr": 0.646307946059838, "objective/train/value_std": 0.016021728515625, "objective/train/weight_avg": 1.0005794763565063, "objective/train/weighted_lm_loss": 1.2920138835906982, "objective/train/weights_max": 2.0619101524353027, "objective/train/weights_min": 0.03041936829686165, "theoretical_loss": 3.4944936697626545, "tokens_seen": 1769472000 }, { "epoch": 0.54, "learning_rate": 0.0004684641309581127, "loss": 0.0655, "theoretical_loss": 3.4944936697626545, "tokens_seen": 1769472000 }, { "epoch": 0.54, "learning_rate": 0.00046838388701653027, "loss": 0.068, "theoretical_loss": 3.4944498225942953, "tokens_seen": 1769734144 }, { "epoch": 0.54, "learning_rate": 0.00046830364307494783, "loss": 0.0698, "theoretical_loss": 3.4944059837386394, "tokens_seen": 1769996288 }, { "epoch": 0.54, "learning_rate": 0.00046822339913336544, "loss": 0.0712, "theoretical_loss": 3.494362153192879, "tokens_seen": 1770258432 }, { "epoch": 0.54, "learning_rate": 0.000468143155191783, "loss": 0.0698, "theoretical_loss": 3.494318330954209, "tokens_seen": 1770520576 }, { "epoch": 0.54, "learning_rate": 0.0004680629112502006, "loss": 0.069, "theoretical_loss": 3.494274517019826, "tokens_seen": 1770782720 }, { "epoch": 0.54, "learning_rate": 0.0004679826673086182, "loss": 0.0701, "theoretical_loss": 3.494230711386926, "tokens_seen": 1771044864 }, { "epoch": 0.54, "learning_rate": 0.0004679024233670358, "loss": 0.0682, "theoretical_loss": 3.4941869140527095, "tokens_seen": 1771307008 }, { "epoch": 0.54, "learning_rate": 0.0004678221794254534, "loss": 0.0688, "theoretical_loss": 3.494143125014375, "tokens_seen": 1771569152 }, { "epoch": 0.54, "learning_rate": 0.00046774193548387096, "loss": 0.0715, "theoretical_loss": 3.4940993442691246, "tokens_seen": 1771831296 }, { "epoch": 0.54, "learning_rate": 0.00046766169154228857, "loss": 0.07, "theoretical_loss": 3.4940555718141613, "tokens_seen": 1772093440 }, { "epoch": 0.54, "learning_rate": 0.00046758144760070613, "loss": 0.0692, "theoretical_loss": 3.4940118076466886, "tokens_seen": 1772355584 }, { "epoch": 0.54, "learning_rate": 0.00046750120365912374, "loss": 0.0689, "theoretical_loss": 3.493968051763912, "tokens_seen": 1772617728 }, { "epoch": 0.54, "learning_rate": 0.00046742095971754135, "loss": 0.0693, "theoretical_loss": 3.4939243041630395, "tokens_seen": 1772879872 }, { "epoch": 0.54, "learning_rate": 0.0004673407157759589, "loss": 0.0699, "theoretical_loss": 3.4938805648412776, "tokens_seen": 1773142016 }, { "epoch": 0.54, "learning_rate": 0.0004672604718343765, "loss": 0.07, "theoretical_loss": 3.4938368337958368, "tokens_seen": 1773404160 }, { "epoch": 0.54, "learning_rate": 0.00046718022789279414, "loss": 0.0677, "theoretical_loss": 3.493793111023928, "tokens_seen": 1773666304 }, { "epoch": 0.54, "learning_rate": 0.0004670999839512117, "loss": 0.0677, "theoretical_loss": 3.4937493965227633, "tokens_seen": 1773928448 }, { "epoch": 0.54, "learning_rate": 0.00046701974000962925, "loss": 0.0666, "theoretical_loss": 3.4937056902895565, "tokens_seen": 1774190592 }, { "epoch": 0.54, "learning_rate": 0.00046693949606804687, "loss": 0.0696, "theoretical_loss": 3.4936619923215226, "tokens_seen": 1774452736 }, { "epoch": 0.54, "learning_rate": 0.0004668592521264645, "loss": 0.0661, "theoretical_loss": 3.493618302615878, "tokens_seen": 1774714880 }, { "epoch": 0.54, "learning_rate": 0.00046677900818488204, "loss": 0.0687, "theoretical_loss": 3.4935746211698393, "tokens_seen": 1774977024 }, { "epoch": 0.54, "learning_rate": 0.00046669876424329965, "loss": 0.0683, "theoretical_loss": 3.4935309479806262, "tokens_seen": 1775239168 }, { "epoch": 0.54, "learning_rate": 0.00046661852030171726, "loss": 0.0716, "theoretical_loss": 3.49348728304546, "tokens_seen": 1775501312 }, { "epoch": 0.54, "learning_rate": 0.0004665382763601348, "loss": 0.0663, "theoretical_loss": 3.493443626361561, "tokens_seen": 1775763456 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.0006577958702109754, "objective/train/docs_used": 648417, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4378572702407837, "objective/train/original_loss": 1.4378571510314941, "objective/train/theoretical_loss": 3.4933999779261526, "objective/train/tokens_used": 1796485600, "objective/train/value_avg": -0.005893707275390625, "objective/train/value_loss": 0.00018775436910800636, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.96630859375, "objective/train/value_reward_corr": 0.6805375710021935, "objective/train/value_std": 0.01258087158203125, "objective/train/weight_avg": 1.000742793083191, "objective/train/weighted_lm_loss": 1.4388798475265503, "objective/train/weights_max": 1.6557923555374146, "objective/train/weights_min": 0.368775874376297, "theoretical_loss": 3.4933999779261526, "tokens_seen": 1776025600 }, { "epoch": 0.54, "learning_rate": 0.0004664580324185524, "loss": 0.0693, "theoretical_loss": 3.4933999779261526, "tokens_seen": 1776025600 }, { "epoch": 0.54, "learning_rate": 0.00046637778847697, "loss": 0.0691, "theoretical_loss": 3.4933563377364596, "tokens_seen": 1776287744 }, { "epoch": 0.54, "learning_rate": 0.0004662975445353876, "loss": 0.068, "theoretical_loss": 3.493312705789708, "tokens_seen": 1776549888 }, { "epoch": 0.54, "learning_rate": 0.00046621730059380516, "loss": 0.07, "theoretical_loss": 3.493269082083123, "tokens_seen": 1776812032 }, { "epoch": 0.54, "learning_rate": 0.0004661370566522228, "loss": 0.0708, "theoretical_loss": 3.4932254666139357, "tokens_seen": 1777074176 }, { "epoch": 0.54, "learning_rate": 0.0004660568127106404, "loss": 0.0648, "theoretical_loss": 3.493181859379374, "tokens_seen": 1777336320 }, { "epoch": 0.54, "learning_rate": 0.000465976568769058, "loss": 0.0684, "theoretical_loss": 3.4931382603766696, "tokens_seen": 1777598464 }, { "epoch": 0.54, "learning_rate": 0.0004658963248274755, "loss": 0.0661, "theoretical_loss": 3.493094669603055, "tokens_seen": 1777860608 }, { "epoch": 0.54, "learning_rate": 0.0004658160808858931, "loss": 0.0686, "theoretical_loss": 3.493051087055764, "tokens_seen": 1778122752 }, { "epoch": 0.54, "learning_rate": 0.00046573583694431073, "loss": 0.071, "theoretical_loss": 3.493007512732031, "tokens_seen": 1778384896 }, { "epoch": 0.54, "learning_rate": 0.0004656555930027283, "loss": 0.0697, "theoretical_loss": 3.4929639466290934, "tokens_seen": 1778647040 }, { "epoch": 0.54, "learning_rate": 0.0004655753490611459, "loss": 0.0681, "theoretical_loss": 3.492920388744188, "tokens_seen": 1778909184 }, { "epoch": 0.54, "learning_rate": 0.0004654951051195635, "loss": 0.0694, "theoretical_loss": 3.4928768390745555, "tokens_seen": 1779171328 }, { "epoch": 0.54, "learning_rate": 0.00046541486117798107, "loss": 0.0688, "theoretical_loss": 3.4928332976174348, "tokens_seen": 1779433472 }, { "epoch": 0.54, "learning_rate": 0.00046533461723639863, "loss": 0.068, "theoretical_loss": 3.492789764370068, "tokens_seen": 1779695616 }, { "epoch": 0.54, "learning_rate": 0.00046525437329481624, "loss": 0.0694, "theoretical_loss": 3.4927462393296986, "tokens_seen": 1779957760 }, { "epoch": 0.54, "learning_rate": 0.00046517412935323386, "loss": 0.0721, "theoretical_loss": 3.492702722493571, "tokens_seen": 1780219904 }, { "epoch": 0.54, "learning_rate": 0.0004650938854116514, "loss": 0.0672, "theoretical_loss": 3.4926592138589307, "tokens_seen": 1780482048 }, { "epoch": 0.54, "learning_rate": 0.000465013641470069, "loss": 0.0673, "theoretical_loss": 3.4926157134230253, "tokens_seen": 1780744192 }, { "epoch": 0.54, "learning_rate": 0.00046493339752848664, "loss": 0.0699, "theoretical_loss": 3.4925722211831025, "tokens_seen": 1781006336 }, { "epoch": 0.54, "learning_rate": 0.0004648531535869042, "loss": 0.0688, "theoretical_loss": 3.4925287371364124, "tokens_seen": 1781268480 }, { "epoch": 0.54, "learning_rate": 0.00046477290964532176, "loss": 0.0661, "theoretical_loss": 3.4924852612802066, "tokens_seen": 1781530624 }, { "epoch": 0.54, "learning_rate": 0.00046469266570373937, "loss": 0.0689, "theoretical_loss": 3.4924417936117376, "tokens_seen": 1781792768 }, { "epoch": 0.54, "learning_rate": 0.000464612421762157, "loss": 0.0705, "theoretical_loss": 3.492398334128258, "tokens_seen": 1782054912 }, { "epoch": 0.54, "learning_rate": 0.00046453217782057454, "loss": 0.0664, "theoretical_loss": 3.492354882827023, "tokens_seen": 1782317056 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.0009097594302147627, "objective/train/docs_used": 650724, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4593218564987183, "objective/train/original_loss": 1.4593218564987183, "objective/train/theoretical_loss": 3.4923114397052903, "objective/train/tokens_used": 1803039200, "objective/train/value_avg": -0.0082244873046875, "objective/train/value_loss": 0.00018493611423764378, "objective/train/value_max": -8.350610733032227e-05, "objective/train/value_min": -0.359375, "objective/train/value_reward_corr": 0.6688092284650106, "objective/train/value_std": 0.01255035400390625, "objective/train/weight_avg": 1.0009934902191162, "objective/train/weighted_lm_loss": 1.4602468013763428, "objective/train/weights_max": 1.2040235996246338, "objective/train/weights_min": 0.3700779974460602, "theoretical_loss": 3.4923114397052903, "tokens_seen": 1782579200 }, { "epoch": 0.54, "learning_rate": 0.00046445193387899215, "loss": 0.0709, "theoretical_loss": 3.4923114397052903, "tokens_seen": 1782579200 }, { "epoch": 0.54, "learning_rate": 0.00046437168993740976, "loss": 0.0694, "theoretical_loss": 3.4922680047603167, "tokens_seen": 1782841344 }, { "epoch": 0.54, "learning_rate": 0.0004642914459958273, "loss": 0.0719, "theoretical_loss": 3.4922245779893615, "tokens_seen": 1783103488 }, { "epoch": 0.54, "learning_rate": 0.00046421120205424494, "loss": 0.0726, "theoretical_loss": 3.492181159389685, "tokens_seen": 1783365632 }, { "epoch": 0.54, "learning_rate": 0.0004641309581126625, "loss": 0.0702, "theoretical_loss": 3.4921377489585486, "tokens_seen": 1783627776 }, { "epoch": 0.54, "learning_rate": 0.00046405071417108005, "loss": 0.0692, "theoretical_loss": 3.4920943466932153, "tokens_seen": 1783889920 }, { "epoch": 0.54, "learning_rate": 0.00046397047022949767, "loss": 0.068, "theoretical_loss": 3.49205095259095, "tokens_seen": 1784152064 }, { "epoch": 0.54, "learning_rate": 0.0004638902262879153, "loss": 0.0685, "theoretical_loss": 3.492007566649018, "tokens_seen": 1784414208 }, { "epoch": 0.54, "learning_rate": 0.0004638099823463329, "loss": 0.0681, "theoretical_loss": 3.491964188864686, "tokens_seen": 1784676352 }, { "epoch": 0.54, "learning_rate": 0.00046372973840475045, "loss": 0.0728, "theoretical_loss": 3.491920819235223, "tokens_seen": 1784938496 }, { "epoch": 0.54, "learning_rate": 0.00046364949446316806, "loss": 0.0719, "theoretical_loss": 3.491877457757898, "tokens_seen": 1785200640 }, { "epoch": 0.54, "learning_rate": 0.0004635692505215856, "loss": 0.0681, "theoretical_loss": 3.491834104429982, "tokens_seen": 1785462784 }, { "epoch": 0.54, "learning_rate": 0.0004634890065800032, "loss": 0.0672, "theoretical_loss": 3.491790759248747, "tokens_seen": 1785724928 }, { "epoch": 0.54, "learning_rate": 0.0004634087626384208, "loss": 0.0707, "theoretical_loss": 3.491747422211467, "tokens_seen": 1785987072 }, { "epoch": 0.54, "learning_rate": 0.0004633285186968384, "loss": 0.068, "theoretical_loss": 3.491704093315416, "tokens_seen": 1786249216 }, { "epoch": 0.54, "learning_rate": 0.000463248274755256, "loss": 0.0692, "theoretical_loss": 3.4916607725578714, "tokens_seen": 1786511360 }, { "epoch": 0.54, "learning_rate": 0.0004631680308136736, "loss": 0.0688, "theoretical_loss": 3.4916174599361103, "tokens_seen": 1786773504 }, { "epoch": 0.54, "learning_rate": 0.0004630877868720912, "loss": 0.0724, "theoretical_loss": 3.491574155447411, "tokens_seen": 1787035648 }, { "epoch": 0.54, "learning_rate": 0.00046300754293050875, "loss": 0.0685, "theoretical_loss": 3.491530859089054, "tokens_seen": 1787297792 }, { "epoch": 0.54, "learning_rate": 0.0004629272989889263, "loss": 0.0669, "theoretical_loss": 3.49148757085832, "tokens_seen": 1787559936 }, { "epoch": 0.54, "learning_rate": 0.0004628470550473439, "loss": 0.0723, "theoretical_loss": 3.4914442907524927, "tokens_seen": 1787822080 }, { "epoch": 0.54, "learning_rate": 0.00046276681110576153, "loss": 0.0673, "theoretical_loss": 3.4914010187688556, "tokens_seen": 1788084224 }, { "epoch": 0.54, "learning_rate": 0.00046268656716417914, "loss": 0.0692, "theoretical_loss": 3.4913577549046937, "tokens_seen": 1788346368 }, { "epoch": 0.54, "learning_rate": 0.0004626063232225967, "loss": 0.0672, "theoretical_loss": 3.491314499157294, "tokens_seen": 1788608512 }, { "epoch": 0.54, "learning_rate": 0.0004625260792810143, "loss": 0.0682, "theoretical_loss": 3.491271251523945, "tokens_seen": 1788870656 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.0006975163705646992, "objective/train/docs_used": 653179, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.354465126991272, "objective/train/original_loss": 1.3544648885726929, "objective/train/theoretical_loss": 3.491228012001935, "objective/train/tokens_used": 1809592800, "objective/train/value_avg": -0.007549285888671875, "objective/train/value_loss": 0.00028828682843595743, "objective/train/value_max": -5.4776668548583984e-05, "objective/train/value_min": -0.7099609375, "objective/train/value_reward_corr": 0.613868199622637, "objective/train/value_std": 0.012359619140625, "objective/train/weight_avg": 1.0008219480514526, "objective/train/weighted_lm_loss": 1.3553131818771362, "objective/train/weights_max": 1.442611575126648, "objective/train/weights_min": 0.36832353472709656, "theoretical_loss": 3.491228012001935, "tokens_seen": 1789132800 }, { "epoch": 0.54, "learning_rate": 0.0004624458353394319, "loss": 0.0687, "theoretical_loss": 3.491228012001935, "tokens_seen": 1789132800 }, { "epoch": 0.54, "learning_rate": 0.00046236559139784943, "loss": 0.0684, "theoretical_loss": 3.4911847805885547, "tokens_seen": 1789394944 }, { "epoch": 0.54, "learning_rate": 0.00046228534745626704, "loss": 0.0684, "theoretical_loss": 3.491141557281096, "tokens_seen": 1789657088 }, { "epoch": 0.54, "learning_rate": 0.00046220510351468465, "loss": 0.0728, "theoretical_loss": 3.491098342076852, "tokens_seen": 1789919232 }, { "epoch": 0.54, "learning_rate": 0.00046212485957310227, "loss": 0.0681, "theoretical_loss": 3.4910551349731183, "tokens_seen": 1790181376 }, { "epoch": 0.54, "learning_rate": 0.0004620446156315198, "loss": 0.0677, "theoretical_loss": 3.4910119359671885, "tokens_seen": 1790443520 }, { "epoch": 0.54, "learning_rate": 0.00046196437168993744, "loss": 0.0655, "theoretical_loss": 3.490968745056361, "tokens_seen": 1790705664 }, { "epoch": 0.54, "learning_rate": 0.00046188412774835505, "loss": 0.0686, "theoretical_loss": 3.4909255622379343, "tokens_seen": 1790967808 }, { "epoch": 0.54, "learning_rate": 0.00046180388380677256, "loss": 0.0686, "theoretical_loss": 3.490882387509207, "tokens_seen": 1791229952 }, { "epoch": 0.54, "learning_rate": 0.00046172363986519017, "loss": 0.0702, "theoretical_loss": 3.490839220867481, "tokens_seen": 1791492096 }, { "epoch": 0.54, "learning_rate": 0.0004616433959236078, "loss": 0.0659, "theoretical_loss": 3.490796062310058, "tokens_seen": 1791754240 }, { "epoch": 0.54, "learning_rate": 0.00046156315198202534, "loss": 0.0683, "theoretical_loss": 3.4907529118342415, "tokens_seen": 1792016384 }, { "epoch": 0.54, "learning_rate": 0.00046148290804044295, "loss": 0.0677, "theoretical_loss": 3.490709769437337, "tokens_seen": 1792278528 }, { "epoch": 0.54, "learning_rate": 0.00046140266409886056, "loss": 0.0654, "theoretical_loss": 3.49066663511665, "tokens_seen": 1792540672 }, { "epoch": 0.54, "learning_rate": 0.0004613224201572782, "loss": 0.0686, "theoretical_loss": 3.4906235088694872, "tokens_seen": 1792802816 }, { "epoch": 0.54, "learning_rate": 0.0004612421762156957, "loss": 0.0678, "theoretical_loss": 3.4905803906931587, "tokens_seen": 1793064960 }, { "epoch": 0.54, "learning_rate": 0.0004611619322741133, "loss": 0.0653, "theoretical_loss": 3.4905372805849737, "tokens_seen": 1793327104 }, { "epoch": 0.54, "learning_rate": 0.0004610816883325309, "loss": 0.0658, "theoretical_loss": 3.490494178542243, "tokens_seen": 1793589248 }, { "epoch": 0.54, "learning_rate": 0.00046100144439094846, "loss": 0.0662, "theoretical_loss": 3.4904510845622805, "tokens_seen": 1793851392 }, { "epoch": 0.54, "learning_rate": 0.0004609212004493661, "loss": 0.0691, "theoretical_loss": 3.490407998642399, "tokens_seen": 1794113536 }, { "epoch": 0.54, "learning_rate": 0.0004608409565077837, "loss": 0.0693, "theoretical_loss": 3.4903649207799137, "tokens_seen": 1794375680 }, { "epoch": 0.54, "learning_rate": 0.0004607607125662013, "loss": 0.0691, "theoretical_loss": 3.4903218509721414, "tokens_seen": 1794637824 }, { "epoch": 0.54, "learning_rate": 0.00046068046862461886, "loss": 0.0675, "theoretical_loss": 3.4902787892163993, "tokens_seen": 1794899968 }, { "epoch": 0.54, "learning_rate": 0.0004606002246830364, "loss": 0.0683, "theoretical_loss": 3.490235735510007, "tokens_seen": 1795162112 }, { "epoch": 0.54, "learning_rate": 0.00046051998074145403, "loss": 0.0701, "theoretical_loss": 3.490192689850284, "tokens_seen": 1795424256 }, { "epoch": 0.54, "objective/train/advantage_avg": -0.0002183630713261664, "objective/train/docs_used": 655419, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.333343505859375, "objective/train/original_loss": 1.333343505859375, "objective/train/theoretical_loss": 3.4901496522345528, "objective/train/tokens_used": 1816146400, "objective/train/value_avg": -0.00982666015625, "objective/train/value_loss": 0.0002944105945061892, "objective/train/value_max": -4.1961669921875e-05, "objective/train/value_min": -0.771484375, "objective/train/value_reward_corr": 0.7564874172701911, "objective/train/value_std": 0.0181732177734375, "objective/train/weight_avg": 0.9999178647994995, "objective/train/weighted_lm_loss": 1.3324216604232788, "objective/train/weights_max": 1.3296234607696533, "objective/train/weights_min": 0.3700949549674988, "theoretical_loss": 3.4901496522345528, "tokens_seen": 1795686400 }, { "epoch": 0.54, "learning_rate": 0.0004604397367998716, "loss": 0.0698, "theoretical_loss": 3.4901496522345528, "tokens_seen": 1795686400 }, { "epoch": 0.54, "learning_rate": 0.0004603594928582892, "loss": 0.0687, "theoretical_loss": 3.490106622660136, "tokens_seen": 1795948544 }, { "epoch": 0.54, "learning_rate": 0.0004602792489167068, "loss": 0.0695, "theoretical_loss": 3.4900636011243567, "tokens_seen": 1796210688 }, { "epoch": 0.54, "learning_rate": 0.00046019900497512443, "loss": 0.0704, "theoretical_loss": 3.4900205876245414, "tokens_seen": 1796472832 }, { "epoch": 0.54, "learning_rate": 0.000460118761033542, "loss": 0.0706, "theoretical_loss": 3.4899775821580166, "tokens_seen": 1796734976 }, { "epoch": 0.54, "learning_rate": 0.00046003851709195954, "loss": 0.0689, "theoretical_loss": 3.4899345847221097, "tokens_seen": 1796997120 }, { "epoch": 0.54, "learning_rate": 0.00045995827315037716, "loss": 0.0665, "theoretical_loss": 3.4898915953141505, "tokens_seen": 1797259264 }, { "epoch": 0.54, "learning_rate": 0.0004598780292087947, "loss": 0.0678, "theoretical_loss": 3.4898486139314695, "tokens_seen": 1797521408 }, { "epoch": 0.54, "learning_rate": 0.00045979778526721233, "loss": 0.0699, "theoretical_loss": 3.489805640571398, "tokens_seen": 1797783552 }, { "epoch": 0.54, "learning_rate": 0.00045971754132562994, "loss": 0.0664, "theoretical_loss": 3.48976267523127, "tokens_seen": 1798045696 }, { "epoch": 0.54, "learning_rate": 0.0004596372973840475, "loss": 0.0679, "theoretical_loss": 3.4897197179084185, "tokens_seen": 1798307840 }, { "epoch": 0.55, "learning_rate": 0.0004595570534424651, "loss": 0.07, "theoretical_loss": 3.48967676860018, "tokens_seen": 1798569984 }, { "epoch": 0.55, "learning_rate": 0.0004594768095008827, "loss": 0.0704, "theoretical_loss": 3.4896338273038916, "tokens_seen": 1798832128 }, { "epoch": 0.55, "learning_rate": 0.0004593965655593003, "loss": 0.0665, "theoretical_loss": 3.4895908940168905, "tokens_seen": 1799094272 }, { "epoch": 0.55, "learning_rate": 0.00045931632161771784, "loss": 0.07, "theoretical_loss": 3.489547968736517, "tokens_seen": 1799356416 }, { "epoch": 0.55, "learning_rate": 0.00045923607767613545, "loss": 0.0702, "theoretical_loss": 3.4895050514601116, "tokens_seen": 1799618560 }, { "epoch": 0.55, "learning_rate": 0.00045915583373455307, "loss": 0.0686, "theoretical_loss": 3.4894621421850163, "tokens_seen": 1799880704 }, { "epoch": 0.55, "learning_rate": 0.0004590755897929706, "loss": 0.069, "theoretical_loss": 3.489419240908574, "tokens_seen": 1800142848 }, { "epoch": 0.55, "learning_rate": 0.00045899534585138824, "loss": 0.0706, "theoretical_loss": 3.48937634762813, "tokens_seen": 1800404992 }, { "epoch": 0.55, "learning_rate": 0.00045891510190980585, "loss": 0.0673, "theoretical_loss": 3.489333462341029, "tokens_seen": 1800667136 }, { "epoch": 0.55, "learning_rate": 0.0004588348579682234, "loss": 0.0686, "theoretical_loss": 3.4892905850446185, "tokens_seen": 1800929280 }, { "epoch": 0.55, "learning_rate": 0.00045875461402664097, "loss": 0.0674, "theoretical_loss": 3.489247715736247, "tokens_seen": 1801191424 }, { "epoch": 0.55, "learning_rate": 0.0004586743700850586, "loss": 0.0695, "theoretical_loss": 3.489204854413264, "tokens_seen": 1801453568 }, { "epoch": 0.55, "learning_rate": 0.0004585941261434762, "loss": 0.0662, "theoretical_loss": 3.48916200107302, "tokens_seen": 1801715712 }, { "epoch": 0.55, "learning_rate": 0.00045851388220189375, "loss": 0.0668, "theoretical_loss": 3.489119155712868, "tokens_seen": 1801977856 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.0006118986057117581, "objective/train/docs_used": 657910, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.438297986984253, "objective/train/original_loss": 1.4382978677749634, "objective/train/theoretical_loss": 3.4890763183301607, "objective/train/tokens_used": 1822700000, "objective/train/value_avg": -0.00920867919921875, "objective/train/value_loss": 0.00051107257604599, "objective/train/value_max": -4.9114227294921875e-05, "objective/train/value_min": -0.7216796875, "objective/train/value_reward_corr": 0.6640711326255471, "objective/train/value_std": 0.019927978515625, "objective/train/weight_avg": 1.0008348226547241, "objective/train/weighted_lm_loss": 1.4394363164901733, "objective/train/weights_max": 2.024425983428955, "objective/train/weights_min": 0.37084680795669556, "theoretical_loss": 3.4890763183301607, "tokens_seen": 1802240000 }, { "epoch": 0.55, "learning_rate": 0.00045843363826031136, "loss": 0.0682, "theoretical_loss": 3.4890763183301607, "tokens_seen": 1802240000 }, { "epoch": 0.55, "learning_rate": 0.000458353394318729, "loss": 0.0694, "theoretical_loss": 3.489033488922253, "tokens_seen": 1802502144 }, { "epoch": 0.55, "learning_rate": 0.00045827315037714653, "loss": 0.0672, "theoretical_loss": 3.4889906674865, "tokens_seen": 1802764288 }, { "epoch": 0.55, "learning_rate": 0.0004581929064355641, "loss": 0.0719, "theoretical_loss": 3.48894785402026, "tokens_seen": 1803026432 }, { "epoch": 0.55, "learning_rate": 0.0004581126624939817, "loss": 0.0676, "theoretical_loss": 3.4889050485208912, "tokens_seen": 1803288576 }, { "epoch": 0.55, "learning_rate": 0.0004580324185523993, "loss": 0.0682, "theoretical_loss": 3.4888622509857523, "tokens_seen": 1803550720 }, { "epoch": 0.55, "learning_rate": 0.0004579521746108169, "loss": 0.0653, "theoretical_loss": 3.488819461412205, "tokens_seen": 1803812864 }, { "epoch": 0.55, "learning_rate": 0.0004578719306692345, "loss": 0.0706, "theoretical_loss": 3.4887766797976116, "tokens_seen": 1804075008 }, { "epoch": 0.55, "learning_rate": 0.0004577916867276521, "loss": 0.0705, "theoretical_loss": 3.488733906139336, "tokens_seen": 1804337152 }, { "epoch": 0.55, "learning_rate": 0.0004577114427860697, "loss": 0.0709, "theoretical_loss": 3.4886911404347414, "tokens_seen": 1804599296 }, { "epoch": 0.55, "learning_rate": 0.0004576311988444872, "loss": 0.0675, "theoretical_loss": 3.4886483826811947, "tokens_seen": 1804861440 }, { "epoch": 0.55, "learning_rate": 0.00045755095490290483, "loss": 0.0675, "theoretical_loss": 3.4886056328760633, "tokens_seen": 1805123584 }, { "epoch": 0.55, "learning_rate": 0.00045747071096132244, "loss": 0.0697, "theoretical_loss": 3.4885628910167155, "tokens_seen": 1805385728 }, { "epoch": 0.55, "learning_rate": 0.00045739046701974, "loss": 0.0703, "theoretical_loss": 3.488520157100521, "tokens_seen": 1805647872 }, { "epoch": 0.55, "learning_rate": 0.0004573102230781576, "loss": 0.0703, "theoretical_loss": 3.4884774311248505, "tokens_seen": 1805910016 }, { "epoch": 0.55, "learning_rate": 0.00045722997913657523, "loss": 0.0676, "theoretical_loss": 3.4884347130870768, "tokens_seen": 1806172160 }, { "epoch": 0.55, "learning_rate": 0.0004571497351949928, "loss": 0.0662, "theoretical_loss": 3.4883920029845727, "tokens_seen": 1806434304 }, { "epoch": 0.55, "learning_rate": 0.00045706949125341034, "loss": 0.0691, "theoretical_loss": 3.488349300814713, "tokens_seen": 1806696448 }, { "epoch": 0.55, "learning_rate": 0.00045698924731182796, "loss": 0.0691, "theoretical_loss": 3.4883066065748745, "tokens_seen": 1806958592 }, { "epoch": 0.55, "learning_rate": 0.00045690900337024557, "loss": 0.0706, "theoretical_loss": 3.488263920262434, "tokens_seen": 1807220736 }, { "epoch": 0.55, "learning_rate": 0.00045682875942866313, "loss": 0.0687, "theoretical_loss": 3.4882212418747693, "tokens_seen": 1807482880 }, { "epoch": 0.55, "learning_rate": 0.00045674851548708074, "loss": 0.0686, "theoretical_loss": 3.4881785714092617, "tokens_seen": 1807745024 }, { "epoch": 0.55, "learning_rate": 0.00045666827154549835, "loss": 0.0704, "theoretical_loss": 3.48813590886329, "tokens_seen": 1808007168 }, { "epoch": 0.55, "learning_rate": 0.0004565880276039159, "loss": 0.0687, "theoretical_loss": 3.488093254234238, "tokens_seen": 1808269312 }, { "epoch": 0.55, "learning_rate": 0.00045650778366233347, "loss": 0.0682, "theoretical_loss": 3.488050607519489, "tokens_seen": 1808531456 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.0004177616792730987, "objective/train/docs_used": 660069, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3710644245147705, "objective/train/original_loss": 1.37106454372406, "objective/train/theoretical_loss": 3.4880079687164267, "objective/train/tokens_used": 1829253600, "objective/train/value_avg": -0.0078277587890625, "objective/train/value_loss": 0.0003445453767199069, "objective/train/value_max": -6.920099258422852e-05, "objective/train/value_min": -0.86474609375, "objective/train/value_reward_corr": 0.692850757955269, "objective/train/value_std": 0.01751708984375, "objective/train/weight_avg": 1.000558853149414, "objective/train/weighted_lm_loss": 1.3716151714324951, "objective/train/weights_max": 1.3765496015548706, "objective/train/weights_min": 0.2556014955043793, "theoretical_loss": 3.4880079687164267, "tokens_seen": 1808793600 }, { "epoch": 0.55, "learning_rate": 0.0004564275397207511, "loss": 0.07, "theoretical_loss": 3.4880079687164267, "tokens_seen": 1808793600 }, { "epoch": 0.55, "learning_rate": 0.0004563472957791687, "loss": 0.0677, "theoretical_loss": 3.4879653378224384, "tokens_seen": 1809055744 }, { "epoch": 0.55, "learning_rate": 0.00045626705183758625, "loss": 0.0696, "theoretical_loss": 3.4879227148349106, "tokens_seen": 1809317888 }, { "epoch": 0.55, "learning_rate": 0.00045618680789600387, "loss": 0.0664, "theoretical_loss": 3.487880099751232, "tokens_seen": 1809580032 }, { "epoch": 0.55, "learning_rate": 0.0004561065639544215, "loss": 0.0645, "theoretical_loss": 3.487837492568792, "tokens_seen": 1809842176 }, { "epoch": 0.55, "learning_rate": 0.00045602632001283904, "loss": 0.0697, "theoretical_loss": 3.487794893284981, "tokens_seen": 1810104320 }, { "epoch": 0.55, "learning_rate": 0.00045594607607125665, "loss": 0.0684, "theoretical_loss": 3.487752301897192, "tokens_seen": 1810366464 }, { "epoch": 0.55, "learning_rate": 0.0004558658321296742, "loss": 0.0701, "theoretical_loss": 3.487709718402818, "tokens_seen": 1810628608 }, { "epoch": 0.55, "learning_rate": 0.00045578558818809177, "loss": 0.0676, "theoretical_loss": 3.4876671427992543, "tokens_seen": 1810890752 }, { "epoch": 0.55, "learning_rate": 0.0004557053442465094, "loss": 0.0671, "theoretical_loss": 3.4876245750838955, "tokens_seen": 1811152896 }, { "epoch": 0.55, "learning_rate": 0.000455625100304927, "loss": 0.0693, "theoretical_loss": 3.48758201525414, "tokens_seen": 1811415040 }, { "epoch": 0.55, "learning_rate": 0.0004555448563633446, "loss": 0.0669, "theoretical_loss": 3.487539463307385, "tokens_seen": 1811677184 }, { "epoch": 0.55, "learning_rate": 0.00045546461242176216, "loss": 0.0679, "theoretical_loss": 3.4874969192410306, "tokens_seen": 1811939328 }, { "epoch": 0.55, "learning_rate": 0.0004553843684801798, "loss": 0.0674, "theoretical_loss": 3.4874543830524782, "tokens_seen": 1812201472 }, { "epoch": 0.55, "learning_rate": 0.00045530412453859733, "loss": 0.0693, "theoretical_loss": 3.487411854739128, "tokens_seen": 1812463616 }, { "epoch": 0.55, "learning_rate": 0.0004552238805970149, "loss": 0.0691, "theoretical_loss": 3.487369334298386, "tokens_seen": 1812725760 }, { "epoch": 0.55, "learning_rate": 0.0004551436366554325, "loss": 0.0673, "theoretical_loss": 3.4873268217276543, "tokens_seen": 1812987904 }, { "epoch": 0.55, "learning_rate": 0.0004550633927138501, "loss": 0.0696, "theoretical_loss": 3.48728431702434, "tokens_seen": 1813250048 }, { "epoch": 0.55, "learning_rate": 0.00045498314877226773, "loss": 0.0672, "theoretical_loss": 3.4872418201858495, "tokens_seen": 1813512192 }, { "epoch": 0.55, "learning_rate": 0.0004549029048306853, "loss": 0.069, "theoretical_loss": 3.487199331209591, "tokens_seen": 1813774336 }, { "epoch": 0.55, "learning_rate": 0.0004548226608891029, "loss": 0.0707, "theoretical_loss": 3.487156850092974, "tokens_seen": 1814036480 }, { "epoch": 0.55, "learning_rate": 0.0004547424169475205, "loss": 0.0717, "theoretical_loss": 3.487114376833409, "tokens_seen": 1814298624 }, { "epoch": 0.55, "learning_rate": 0.000454662173005938, "loss": 0.0678, "theoretical_loss": 3.487071911428308, "tokens_seen": 1814560768 }, { "epoch": 0.55, "learning_rate": 0.00045458192906435563, "loss": 0.0659, "theoretical_loss": 3.487029453875085, "tokens_seen": 1814822912 }, { "epoch": 0.55, "learning_rate": 0.00045450168512277324, "loss": 0.0706, "theoretical_loss": 3.4869870041711524, "tokens_seen": 1815085056 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.00177646370138973, "objective/train/docs_used": 662349, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.329512119293213, "objective/train/original_loss": 1.329512119293213, "objective/train/theoretical_loss": 3.4869445623139272, "objective/train/tokens_used": 1835807200, "objective/train/value_avg": -0.00986480712890625, "objective/train/value_loss": 0.0003300151147413999, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.7333984375, "objective/train/value_reward_corr": 0.7485666886816452, "objective/train/value_std": 0.021759033203125, "objective/train/weight_avg": 1.0019299983978271, "objective/train/weighted_lm_loss": 1.3307819366455078, "objective/train/weights_max": 1.4997124671936035, "objective/train/weights_min": 0.38332873582839966, "theoretical_loss": 3.4869445623139272, "tokens_seen": 1815347200 }, { "epoch": 0.55, "learning_rate": 0.00045442144118119086, "loss": 0.0685, "theoretical_loss": 3.4869445623139272, "tokens_seen": 1815347200 }, { "epoch": 0.55, "learning_rate": 0.0004543411972396084, "loss": 0.0717, "theoretical_loss": 3.4869021283008257, "tokens_seen": 1815609344 }, { "epoch": 0.55, "learning_rate": 0.000454260953298026, "loss": 0.067, "theoretical_loss": 3.4868597021292658, "tokens_seen": 1815871488 }, { "epoch": 0.55, "learning_rate": 0.00045418070935644364, "loss": 0.0711, "theoretical_loss": 3.4868172837966673, "tokens_seen": 1816133632 }, { "epoch": 0.55, "learning_rate": 0.00045410046541486114, "loss": 0.0664, "theoretical_loss": 3.48677487330045, "tokens_seen": 1816395776 }, { "epoch": 0.55, "learning_rate": 0.00045402022147327876, "loss": 0.0683, "theoretical_loss": 3.486732470638036, "tokens_seen": 1816657920 }, { "epoch": 0.55, "learning_rate": 0.00045393997753169637, "loss": 0.0686, "theoretical_loss": 3.4866900758068478, "tokens_seen": 1816920064 }, { "epoch": 0.55, "learning_rate": 0.000453859733590114, "loss": 0.0673, "theoretical_loss": 3.4866476888043096, "tokens_seen": 1817182208 }, { "epoch": 0.55, "learning_rate": 0.00045377948964853154, "loss": 0.0682, "theoretical_loss": 3.486605309627847, "tokens_seen": 1817444352 }, { "epoch": 0.55, "learning_rate": 0.00045369924570694915, "loss": 0.0693, "theoretical_loss": 3.4865629382748864, "tokens_seen": 1817706496 }, { "epoch": 0.55, "learning_rate": 0.00045361900176536676, "loss": 0.07, "theoretical_loss": 3.486520574742855, "tokens_seen": 1817968640 }, { "epoch": 0.55, "learning_rate": 0.00045353875782378427, "loss": 0.0667, "theoretical_loss": 3.486478219029183, "tokens_seen": 1818230784 }, { "epoch": 0.55, "learning_rate": 0.0004534585138822019, "loss": 0.0698, "theoretical_loss": 3.4864358711312997, "tokens_seen": 1818492928 }, { "epoch": 0.55, "learning_rate": 0.0004533782699406195, "loss": 0.068, "theoretical_loss": 3.4863935310466365, "tokens_seen": 1818755072 }, { "epoch": 0.55, "learning_rate": 0.00045329802599903705, "loss": 0.0688, "theoretical_loss": 3.486351198772626, "tokens_seen": 1819017216 }, { "epoch": 0.55, "learning_rate": 0.00045321778205745467, "loss": 0.068, "theoretical_loss": 3.4863088743067023, "tokens_seen": 1819279360 }, { "epoch": 0.55, "learning_rate": 0.0004531375381158723, "loss": 0.0707, "theoretical_loss": 3.4862665576463003, "tokens_seen": 1819541504 }, { "epoch": 0.55, "learning_rate": 0.0004530572941742899, "loss": 0.0678, "theoretical_loss": 3.4862242487888566, "tokens_seen": 1819803648 }, { "epoch": 0.55, "learning_rate": 0.00045297705023270745, "loss": 0.0679, "theoretical_loss": 3.486181947731808, "tokens_seen": 1820065792 }, { "epoch": 0.55, "learning_rate": 0.000452896806291125, "loss": 0.0687, "theoretical_loss": 3.486139654472594, "tokens_seen": 1820327936 }, { "epoch": 0.55, "learning_rate": 0.0004528165623495426, "loss": 0.0681, "theoretical_loss": 3.486097369008654, "tokens_seen": 1820590080 }, { "epoch": 0.55, "learning_rate": 0.0004527363184079602, "loss": 0.0704, "theoretical_loss": 3.4860550913374286, "tokens_seen": 1820852224 }, { "epoch": 0.55, "learning_rate": 0.0004526560744663778, "loss": 0.0709, "theoretical_loss": 3.4860128214563613, "tokens_seen": 1821114368 }, { "epoch": 0.55, "learning_rate": 0.0004525758305247954, "loss": 0.0709, "theoretical_loss": 3.4859705593628947, "tokens_seen": 1821376512 }, { "epoch": 0.55, "learning_rate": 0.000452495586583213, "loss": 0.0679, "theoretical_loss": 3.4859283050544736, "tokens_seen": 1821638656 }, { "epoch": 0.55, "objective/train/advantage_avg": 2.5156406991300173e-05, "objective/train/docs_used": 664649, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.30966317653656, "objective/train/original_loss": 1.3096632957458496, "objective/train/theoretical_loss": 3.4858860585285445, "objective/train/tokens_used": 1842360800, "objective/train/value_avg": -0.008636474609375, "objective/train/value_loss": 0.00020235766714904457, "objective/train/value_max": -5.269050598144531e-05, "objective/train/value_min": -0.487548828125, "objective/train/value_reward_corr": 0.7453598247617829, "objective/train/value_std": 0.0155029296875, "objective/train/weight_avg": 1.0001190900802612, "objective/train/weighted_lm_loss": 1.3092310428619385, "objective/train/weights_max": 1.396862268447876, "objective/train/weights_min": 0.37186679244041443, "theoretical_loss": 3.4858860585285445, "tokens_seen": 1821900800 }, { "epoch": 0.55, "learning_rate": 0.0004524153426416306, "loss": 0.0679, "theoretical_loss": 3.4858860585285445, "tokens_seen": 1821900800 }, { "epoch": 0.55, "learning_rate": 0.00045233509870004813, "loss": 0.0697, "theoretical_loss": 3.485843819782554, "tokens_seen": 1822162944 }, { "epoch": 0.55, "learning_rate": 0.00045225485475846575, "loss": 0.0718, "theoretical_loss": 3.48580158881395, "tokens_seen": 1822425088 }, { "epoch": 0.55, "learning_rate": 0.0004521746108168833, "loss": 0.0691, "theoretical_loss": 3.4857593656201833, "tokens_seen": 1822687232 }, { "epoch": 0.55, "learning_rate": 0.0004520943668753009, "loss": 0.0702, "theoretical_loss": 3.4857171501987034, "tokens_seen": 1822949376 }, { "epoch": 0.55, "learning_rate": 0.00045201412293371853, "loss": 0.067, "theoretical_loss": 3.4856749425469635, "tokens_seen": 1823211520 }, { "epoch": 0.55, "learning_rate": 0.00045193387899213614, "loss": 0.0737, "theoretical_loss": 3.485632742662416, "tokens_seen": 1823473664 }, { "epoch": 0.55, "learning_rate": 0.0004518536350505537, "loss": 0.0698, "theoretical_loss": 3.4855905505425144, "tokens_seen": 1823735808 }, { "epoch": 0.55, "learning_rate": 0.00045177339110897126, "loss": 0.069, "theoretical_loss": 3.485548366184716, "tokens_seen": 1823997952 }, { "epoch": 0.55, "learning_rate": 0.00045169314716738887, "loss": 0.0711, "theoretical_loss": 3.4855061895864763, "tokens_seen": 1824260096 }, { "epoch": 0.55, "learning_rate": 0.00045161290322580643, "loss": 0.0691, "theoretical_loss": 3.485464020745253, "tokens_seen": 1824522240 }, { "epoch": 0.55, "learning_rate": 0.00045153265928422404, "loss": 0.0668, "theoretical_loss": 3.4854218596585067, "tokens_seen": 1824784384 }, { "epoch": 0.55, "learning_rate": 0.00045145241534264165, "loss": 0.0675, "theoretical_loss": 3.485379706323697, "tokens_seen": 1825046528 }, { "epoch": 0.55, "learning_rate": 0.0004513721714010592, "loss": 0.0691, "theoretical_loss": 3.4853375607382846, "tokens_seen": 1825308672 }, { "epoch": 0.55, "learning_rate": 0.0004512919274594768, "loss": 0.0689, "theoretical_loss": 3.4852954228997337, "tokens_seen": 1825570816 }, { "epoch": 0.55, "learning_rate": 0.00045121168351789444, "loss": 0.069, "theoretical_loss": 3.485253292805507, "tokens_seen": 1825832960 }, { "epoch": 0.55, "learning_rate": 0.000451131439576312, "loss": 0.0703, "theoretical_loss": 3.48521117045307, "tokens_seen": 1826095104 }, { "epoch": 0.55, "learning_rate": 0.00045105119563472956, "loss": 0.0706, "theoretical_loss": 3.4851690558398896, "tokens_seen": 1826357248 }, { "epoch": 0.55, "learning_rate": 0.00045097095169314717, "loss": 0.0669, "theoretical_loss": 3.4851269489634324, "tokens_seen": 1826619392 }, { "epoch": 0.55, "learning_rate": 0.0004508907077515648, "loss": 0.0662, "theoretical_loss": 3.4850848498211677, "tokens_seen": 1826881536 }, { "epoch": 0.55, "learning_rate": 0.00045081046380998234, "loss": 0.0668, "theoretical_loss": 3.4850427584105654, "tokens_seen": 1827143680 }, { "epoch": 0.55, "learning_rate": 0.00045073021986839995, "loss": 0.0679, "theoretical_loss": 3.4850006747290965, "tokens_seen": 1827405824 }, { "epoch": 0.55, "learning_rate": 0.00045064997592681756, "loss": 0.0708, "theoretical_loss": 3.4849585987742326, "tokens_seen": 1827667968 }, { "epoch": 0.55, "learning_rate": 0.0004505697319852351, "loss": 0.0712, "theoretical_loss": 3.4849165305434484, "tokens_seen": 1827930112 }, { "epoch": 0.55, "learning_rate": 0.0004504894880436527, "loss": 0.0668, "theoretical_loss": 3.4848744700342174, "tokens_seen": 1828192256 }, { "epoch": 0.55, "objective/train/advantage_avg": -0.00015672965673729777, "objective/train/docs_used": 666768, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3809049129486084, "objective/train/original_loss": 1.3809049129486084, "objective/train/theoretical_loss": 3.484832417244016, "objective/train/tokens_used": 1848914400, "objective/train/value_avg": -0.0067596435546875, "objective/train/value_loss": 0.0001564290578244254, "objective/train/value_max": -6.502866744995117e-05, "objective/train/value_min": -0.6142578125, "objective/train/value_reward_corr": 0.7000283534341674, "objective/train/value_std": 0.012420654296875, "objective/train/weight_avg": 0.9999170303344727, "objective/train/weighted_lm_loss": 1.3802940845489502, "objective/train/weights_max": 1.7114393711090088, "objective/train/weights_min": 0.36869922280311584, "theoretical_loss": 3.484832417244016, "tokens_seen": 1828454400 }, { "epoch": 0.55, "learning_rate": 0.0004504092441020703, "loss": 0.0698, "theoretical_loss": 3.484832417244016, "tokens_seen": 1828454400 }, { "epoch": 0.55, "learning_rate": 0.0004503290001604879, "loss": 0.0681, "theoretical_loss": 3.484790372170321, "tokens_seen": 1828716544 }, { "epoch": 0.55, "learning_rate": 0.00045024875621890546, "loss": 0.067, "theoretical_loss": 3.484748334810611, "tokens_seen": 1828978688 }, { "epoch": 0.55, "learning_rate": 0.0004501685122773231, "loss": 0.0678, "theoretical_loss": 3.484706305162365, "tokens_seen": 1829240832 }, { "epoch": 0.55, "learning_rate": 0.0004500882683357407, "loss": 0.0672, "theoretical_loss": 3.484664283223064, "tokens_seen": 1829502976 }, { "epoch": 0.55, "learning_rate": 0.0004500080243941583, "loss": 0.0691, "theoretical_loss": 3.484622268990189, "tokens_seen": 1829765120 }, { "epoch": 0.55, "learning_rate": 0.0004499277804525758, "loss": 0.0691, "theoretical_loss": 3.4845802624612237, "tokens_seen": 1830027264 }, { "epoch": 0.55, "learning_rate": 0.0004498475365109934, "loss": 0.0677, "theoretical_loss": 3.484538263633652, "tokens_seen": 1830289408 }, { "epoch": 0.55, "learning_rate": 0.00044976729256941103, "loss": 0.0694, "theoretical_loss": 3.484496272504959, "tokens_seen": 1830551552 }, { "epoch": 0.55, "learning_rate": 0.0004496870486278286, "loss": 0.0676, "theoretical_loss": 3.484454289072631, "tokens_seen": 1830813696 }, { "epoch": 0.55, "learning_rate": 0.0004496068046862462, "loss": 0.0693, "theoretical_loss": 3.4844123133341567, "tokens_seen": 1831075840 }, { "epoch": 0.55, "learning_rate": 0.0004495265607446638, "loss": 0.0672, "theoretical_loss": 3.484370345287024, "tokens_seen": 1831337984 }, { "epoch": 0.56, "learning_rate": 0.00044944631680308143, "loss": 0.0665, "theoretical_loss": 3.484328384928723, "tokens_seen": 1831600128 }, { "epoch": 0.56, "learning_rate": 0.00044936607286149893, "loss": 0.0675, "theoretical_loss": 3.484286432256745, "tokens_seen": 1831862272 }, { "epoch": 0.56, "learning_rate": 0.00044928582891991654, "loss": 0.0686, "theoretical_loss": 3.484244487268583, "tokens_seen": 1832124416 }, { "epoch": 0.56, "learning_rate": 0.00044920558497833416, "loss": 0.0688, "theoretical_loss": 3.4842025499617297, "tokens_seen": 1832386560 }, { "epoch": 0.56, "learning_rate": 0.0004491253410367517, "loss": 0.0717, "theoretical_loss": 3.4841606203336806, "tokens_seen": 1832648704 }, { "epoch": 0.56, "learning_rate": 0.00044904509709516933, "loss": 0.0664, "theoretical_loss": 3.4841186983819306, "tokens_seen": 1832910848 }, { "epoch": 0.56, "learning_rate": 0.00044896485315358694, "loss": 0.0681, "theoretical_loss": 3.4840767841039777, "tokens_seen": 1833172992 }, { "epoch": 0.56, "learning_rate": 0.0004488846092120045, "loss": 0.0697, "theoretical_loss": 3.48403487749732, "tokens_seen": 1833435136 }, { "epoch": 0.56, "learning_rate": 0.00044880436527042206, "loss": 0.0707, "theoretical_loss": 3.4839929785594563, "tokens_seen": 1833697280 }, { "epoch": 0.56, "learning_rate": 0.00044872412132883967, "loss": 0.0679, "theoretical_loss": 3.4839510872878883, "tokens_seen": 1833959424 }, { "epoch": 0.56, "learning_rate": 0.0004486438773872573, "loss": 0.0674, "theoretical_loss": 3.483909203680117, "tokens_seen": 1834221568 }, { "epoch": 0.56, "learning_rate": 0.00044856363344567484, "loss": 0.0695, "theoretical_loss": 3.483867327733645, "tokens_seen": 1834483712 }, { "epoch": 0.56, "learning_rate": 0.00044848338950409245, "loss": 0.0676, "theoretical_loss": 3.4838254594459777, "tokens_seen": 1834745856 }, { "epoch": 0.56, "objective/train/advantage_avg": -0.00017360829224344343, "objective/train/docs_used": 669245, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2899640798568726, "objective/train/original_loss": 1.289963960647583, "objective/train/theoretical_loss": 3.483783598814619, "objective/train/tokens_used": 1855468000, "objective/train/value_avg": -0.0079498291015625, "objective/train/value_loss": 0.0003152432036586106, "objective/train/value_max": -5.561113357543945e-05, "objective/train/value_min": -0.9013671875, "objective/train/value_reward_corr": 0.652301360066071, "objective/train/value_std": 0.01739501953125, "objective/train/weight_avg": 0.9999733567237854, "objective/train/weighted_lm_loss": 1.290113091468811, "objective/train/weights_max": 2.32053279876709, "objective/train/weights_min": 0.37400925159454346, "theoretical_loss": 3.483783598814619, "tokens_seen": 1835008000 }, { "epoch": 0.56, "learning_rate": 0.00044840314556251007, "loss": 0.0685, "theoretical_loss": 3.483783598814619, "tokens_seen": 1835008000 }, { "epoch": 0.56, "learning_rate": 0.0004483229016209276, "loss": 0.069, "theoretical_loss": 3.4837417458370767, "tokens_seen": 1835270144 }, { "epoch": 0.56, "learning_rate": 0.00044824265767934524, "loss": 0.0672, "theoretical_loss": 3.483699900510857, "tokens_seen": 1835532288 }, { "epoch": 0.56, "learning_rate": 0.0004481624137377628, "loss": 0.069, "theoretical_loss": 3.4836580628334697, "tokens_seen": 1835794432 }, { "epoch": 0.56, "learning_rate": 0.0004480821697961804, "loss": 0.0719, "theoretical_loss": 3.4836162328024245, "tokens_seen": 1836056576 }, { "epoch": 0.56, "learning_rate": 0.00044800192585459797, "loss": 0.0683, "theoretical_loss": 3.4835744104152324, "tokens_seen": 1836318720 }, { "epoch": 0.56, "learning_rate": 0.0004479216819130156, "loss": 0.0698, "theoretical_loss": 3.483532595669406, "tokens_seen": 1836580864 }, { "epoch": 0.56, "learning_rate": 0.0004478414379714332, "loss": 0.0654, "theoretical_loss": 3.4834907885624586, "tokens_seen": 1836843008 }, { "epoch": 0.56, "learning_rate": 0.00044776119402985075, "loss": 0.0658, "theoretical_loss": 3.4834489890919045, "tokens_seen": 1837105152 }, { "epoch": 0.56, "learning_rate": 0.00044768095008826836, "loss": 0.0674, "theoretical_loss": 3.48340719725526, "tokens_seen": 1837367296 }, { "epoch": 0.56, "learning_rate": 0.0004476007061466859, "loss": 0.0697, "theoretical_loss": 3.4833654130500413, "tokens_seen": 1837629440 }, { "epoch": 0.56, "learning_rate": 0.0004475204622051035, "loss": 0.0665, "theoretical_loss": 3.4833236364737674, "tokens_seen": 1837891584 }, { "epoch": 0.56, "learning_rate": 0.0004474402182635211, "loss": 0.0677, "theoretical_loss": 3.4832818675239574, "tokens_seen": 1838153728 }, { "epoch": 0.56, "learning_rate": 0.0004473599743219387, "loss": 0.0687, "theoretical_loss": 3.483240106198131, "tokens_seen": 1838415872 }, { "epoch": 0.56, "learning_rate": 0.0004472797303803563, "loss": 0.0709, "theoretical_loss": 3.483198352493811, "tokens_seen": 1838678016 }, { "epoch": 0.56, "learning_rate": 0.0004471994864387739, "loss": 0.0719, "theoretical_loss": 3.4831566064085187, "tokens_seen": 1838940160 }, { "epoch": 0.56, "learning_rate": 0.0004471192424971915, "loss": 0.068, "theoretical_loss": 3.483114867939779, "tokens_seen": 1839202304 }, { "epoch": 0.56, "learning_rate": 0.00044703899855560905, "loss": 0.0676, "theoretical_loss": 3.4830731370851167, "tokens_seen": 1839464448 }, { "epoch": 0.56, "learning_rate": 0.0004469587546140266, "loss": 0.0674, "theoretical_loss": 3.483031413842058, "tokens_seen": 1839726592 }, { "epoch": 0.56, "learning_rate": 0.0004468785106724442, "loss": 0.07, "theoretical_loss": 3.4829896982081303, "tokens_seen": 1839988736 }, { "epoch": 0.56, "learning_rate": 0.00044679826673086183, "loss": 0.0701, "theoretical_loss": 3.4829479901808624, "tokens_seen": 1840250880 }, { "epoch": 0.56, "learning_rate": 0.00044671802278927944, "loss": 0.0668, "theoretical_loss": 3.482906289757784, "tokens_seen": 1840513024 }, { "epoch": 0.56, "learning_rate": 0.000446637778847697, "loss": 0.0699, "theoretical_loss": 3.482864596936425, "tokens_seen": 1840775168 }, { "epoch": 0.56, "learning_rate": 0.0004465575349061146, "loss": 0.0684, "theoretical_loss": 3.482822911714318, "tokens_seen": 1841037312 }, { "epoch": 0.56, "learning_rate": 0.00044647729096453223, "loss": 0.0688, "theoretical_loss": 3.4827812340889963, "tokens_seen": 1841299456 }, { "epoch": 0.56, "objective/train/advantage_avg": -8.307833195431158e-05, "objective/train/docs_used": 671743, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3671631813049316, "objective/train/original_loss": 1.367163062095642, "objective/train/theoretical_loss": 3.4827395640579946, "objective/train/tokens_used": 1862021600, "objective/train/value_avg": -0.0087738037109375, "objective/train/value_loss": 0.0003252418537158519, "objective/train/value_max": -2.110004425048828e-05, "objective/train/value_min": -0.84423828125, "objective/train/value_reward_corr": 0.8539613338364523, "objective/train/value_std": 0.0264739990234375, "objective/train/weight_avg": 1.0000675916671753, "objective/train/weighted_lm_loss": 1.3670684099197388, "objective/train/weights_max": 1.6681559085845947, "objective/train/weights_min": 0.3783082664012909, "theoretical_loss": 3.4827395640579946, "tokens_seen": 1841561600 }, { "epoch": 0.56, "learning_rate": 0.00044639704702294973, "loss": 0.069, "theoretical_loss": 3.4827395640579946, "tokens_seen": 1841561600 }, { "epoch": 0.56, "learning_rate": 0.00044631680308136734, "loss": 0.0656, "theoretical_loss": 3.4826979016188475, "tokens_seen": 1841823744 }, { "epoch": 0.56, "learning_rate": 0.00044623655913978496, "loss": 0.069, "theoretical_loss": 3.482656246769092, "tokens_seen": 1842085888 }, { "epoch": 0.56, "learning_rate": 0.00044615631519820257, "loss": 0.0682, "theoretical_loss": 3.482614599506266, "tokens_seen": 1842348032 }, { "epoch": 0.56, "learning_rate": 0.00044607607125662013, "loss": 0.0695, "theoretical_loss": 3.482572959827908, "tokens_seen": 1842610176 }, { "epoch": 0.56, "learning_rate": 0.00044599582731503774, "loss": 0.07, "theoretical_loss": 3.482531327731558, "tokens_seen": 1842872320 }, { "epoch": 0.56, "learning_rate": 0.00044591558337345535, "loss": 0.0688, "theoretical_loss": 3.4824897032147577, "tokens_seen": 1843134464 }, { "epoch": 0.56, "learning_rate": 0.00044583533943187286, "loss": 0.0674, "theoretical_loss": 3.482448086275049, "tokens_seen": 1843396608 }, { "epoch": 0.56, "learning_rate": 0.00044575509549029047, "loss": 0.0658, "theoretical_loss": 3.4824064769099756, "tokens_seen": 1843658752 }, { "epoch": 0.56, "learning_rate": 0.0004456748515487081, "loss": 0.0666, "theoretical_loss": 3.4823648751170824, "tokens_seen": 1843920896 }, { "epoch": 0.56, "learning_rate": 0.0004455946076071257, "loss": 0.0667, "theoretical_loss": 3.482323280893915, "tokens_seen": 1844183040 }, { "epoch": 0.56, "learning_rate": 0.00044551436366554325, "loss": 0.0669, "theoretical_loss": 3.4822816942380195, "tokens_seen": 1844445184 }, { "epoch": 0.56, "learning_rate": 0.00044543411972396087, "loss": 0.0671, "theoretical_loss": 3.4822401151469453, "tokens_seen": 1844707328 }, { "epoch": 0.56, "learning_rate": 0.0004453538757823785, "loss": 0.0703, "theoretical_loss": 3.4821985436182405, "tokens_seen": 1844969472 }, { "epoch": 0.56, "learning_rate": 0.00044527363184079604, "loss": 0.0655, "theoretical_loss": 3.4821569796494565, "tokens_seen": 1845231616 }, { "epoch": 0.56, "learning_rate": 0.0004451933878992136, "loss": 0.0659, "theoretical_loss": 3.482115423238144, "tokens_seen": 1845493760 }, { "epoch": 0.56, "learning_rate": 0.0004451131439576312, "loss": 0.0711, "theoretical_loss": 3.4820738743818556, "tokens_seen": 1845755904 }, { "epoch": 0.56, "learning_rate": 0.00044503290001604877, "loss": 0.0696, "theoretical_loss": 3.4820323330781457, "tokens_seen": 1846018048 }, { "epoch": 0.56, "learning_rate": 0.0004449526560744664, "loss": 0.0677, "theoretical_loss": 3.481990799324568, "tokens_seen": 1846280192 }, { "epoch": 0.56, "learning_rate": 0.000444872412132884, "loss": 0.0686, "theoretical_loss": 3.4819492731186807, "tokens_seen": 1846542336 }, { "epoch": 0.56, "learning_rate": 0.0004447921681913016, "loss": 0.0667, "theoretical_loss": 3.481907754458039, "tokens_seen": 1846804480 }, { "epoch": 0.56, "learning_rate": 0.00044471192424971916, "loss": 0.0685, "theoretical_loss": 3.4818662433402014, "tokens_seen": 1847066624 }, { "epoch": 0.56, "learning_rate": 0.0004446316803081367, "loss": 0.0686, "theoretical_loss": 3.4818247397627284, "tokens_seen": 1847328768 }, { "epoch": 0.56, "learning_rate": 0.00044455143636655433, "loss": 0.0669, "theoretical_loss": 3.48178324372318, "tokens_seen": 1847590912 }, { "epoch": 0.56, "learning_rate": 0.0004444711924249719, "loss": 0.0649, "theoretical_loss": 3.481741755219118, "tokens_seen": 1847853056 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.00012247166887391359, "objective/train/docs_used": 673981, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3901854753494263, "objective/train/original_loss": 1.3901854753494263, "objective/train/theoretical_loss": 3.481700274248105, "objective/train/tokens_used": 1868575200, "objective/train/value_avg": -0.007476806640625, "objective/train/value_loss": 0.00014100054977461696, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.218505859375, "objective/train/value_reward_corr": 0.7216289321788012, "objective/train/value_std": 0.01219940185546875, "objective/train/weight_avg": 1.0001883506774902, "objective/train/weighted_lm_loss": 1.3908172845840454, "objective/train/weights_max": 1.1437007188796997, "objective/train/weights_min": 0.37020933628082275, "theoretical_loss": 3.481700274248105, "tokens_seen": 1848115200 }, { "epoch": 0.56, "learning_rate": 0.0004443909484833895, "loss": 0.0708, "theoretical_loss": 3.481700274248105, "tokens_seen": 1848115200 }, { "epoch": 0.56, "learning_rate": 0.0004443107045418071, "loss": 0.0708, "theoretical_loss": 3.481658800807706, "tokens_seen": 1848377344 }, { "epoch": 0.56, "learning_rate": 0.00044423046060022473, "loss": 0.0704, "theoretical_loss": 3.4816173348954846, "tokens_seen": 1848639488 }, { "epoch": 0.56, "learning_rate": 0.0004441502166586423, "loss": 0.0696, "theoretical_loss": 3.481575876509008, "tokens_seen": 1848901632 }, { "epoch": 0.56, "learning_rate": 0.00044406997271705985, "loss": 0.0726, "theoretical_loss": 3.4815344256458434, "tokens_seen": 1849163776 }, { "epoch": 0.56, "learning_rate": 0.00044398972877547746, "loss": 0.0702, "theoretical_loss": 3.4814929823035596, "tokens_seen": 1849425920 }, { "epoch": 0.56, "learning_rate": 0.000443909484833895, "loss": 0.0708, "theoretical_loss": 3.481451546479726, "tokens_seen": 1849688064 }, { "epoch": 0.56, "learning_rate": 0.00044382924089231263, "loss": 0.0685, "theoretical_loss": 3.4814101181719135, "tokens_seen": 1849950208 }, { "epoch": 0.56, "learning_rate": 0.00044374899695073024, "loss": 0.0693, "theoretical_loss": 3.4813686973776936, "tokens_seen": 1850212352 }, { "epoch": 0.56, "learning_rate": 0.00044366875300914786, "loss": 0.067, "theoretical_loss": 3.48132728409464, "tokens_seen": 1850474496 }, { "epoch": 0.56, "learning_rate": 0.0004435885090675654, "loss": 0.0712, "theoretical_loss": 3.4812858783203264, "tokens_seen": 1850736640 }, { "epoch": 0.56, "learning_rate": 0.000443508265125983, "loss": 0.0718, "theoretical_loss": 3.481244480052329, "tokens_seen": 1850998784 }, { "epoch": 0.56, "learning_rate": 0.0004434280211844006, "loss": 0.0674, "theoretical_loss": 3.4812030892882224, "tokens_seen": 1851260928 }, { "epoch": 0.56, "learning_rate": 0.00044334777724281814, "loss": 0.0671, "theoretical_loss": 3.4811617060255857, "tokens_seen": 1851523072 }, { "epoch": 0.56, "learning_rate": 0.00044326753330123576, "loss": 0.0692, "theoretical_loss": 3.481120330261997, "tokens_seen": 1851785216 }, { "epoch": 0.56, "learning_rate": 0.00044318728935965337, "loss": 0.0677, "theoretical_loss": 3.4810789619950366, "tokens_seen": 1852047360 }, { "epoch": 0.56, "learning_rate": 0.0004431070454180709, "loss": 0.0679, "theoretical_loss": 3.481037601222285, "tokens_seen": 1852309504 }, { "epoch": 0.56, "learning_rate": 0.00044302680147648854, "loss": 0.069, "theoretical_loss": 3.480996247941324, "tokens_seen": 1852571648 }, { "epoch": 0.56, "learning_rate": 0.00044294655753490615, "loss": 0.0682, "theoretical_loss": 3.4809549021497372, "tokens_seen": 1852833792 }, { "epoch": 0.56, "learning_rate": 0.0004428663135933237, "loss": 0.0691, "theoretical_loss": 3.480913563845109, "tokens_seen": 1853095936 }, { "epoch": 0.56, "learning_rate": 0.00044278606965174127, "loss": 0.066, "theoretical_loss": 3.480872233025024, "tokens_seen": 1853358080 }, { "epoch": 0.56, "learning_rate": 0.0004427058257101589, "loss": 0.0694, "theoretical_loss": 3.4808309096870698, "tokens_seen": 1853620224 }, { "epoch": 0.56, "learning_rate": 0.0004426255817685765, "loss": 0.0667, "theoretical_loss": 3.480789593828834, "tokens_seen": 1853882368 }, { "epoch": 0.56, "learning_rate": 0.00044254533782699405, "loss": 0.0695, "theoretical_loss": 3.4807482854479037, "tokens_seen": 1854144512 }, { "epoch": 0.56, "learning_rate": 0.00044246509388541167, "loss": 0.0692, "theoretical_loss": 3.4807069845418708, "tokens_seen": 1854406656 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.0009944553021341562, "objective/train/docs_used": 676483, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5289674997329712, "objective/train/original_loss": 1.5289674997329712, "objective/train/theoretical_loss": 3.4806656911083254, "objective/train/tokens_used": 1875128800, "objective/train/value_avg": -0.00623321533203125, "objective/train/value_loss": 0.00023596876417286694, "objective/train/value_max": -6.35385513305664e-05, "objective/train/value_min": -0.64013671875, "objective/train/value_reward_corr": 0.6387599212671781, "objective/train/value_std": 0.01309967041015625, "objective/train/weight_avg": 1.0010981559753418, "objective/train/weighted_lm_loss": 1.5305681228637695, "objective/train/weights_max": 1.3230394124984741, "objective/train/weights_min": 0.37203988432884216, "theoretical_loss": 3.4806656911083254, "tokens_seen": 1854668800 }, { "epoch": 0.56, "learning_rate": 0.0004423848499438293, "loss": 0.0721, "theoretical_loss": 3.4806656911083254, "tokens_seen": 1854668800 }, { "epoch": 0.56, "learning_rate": 0.00044230460600224684, "loss": 0.069, "theoretical_loss": 3.48062440514486, "tokens_seen": 1854930944 }, { "epoch": 0.56, "learning_rate": 0.0004422243620606644, "loss": 0.0693, "theoretical_loss": 3.4805831266490674, "tokens_seen": 1855193088 }, { "epoch": 0.56, "learning_rate": 0.000442144118119082, "loss": 0.067, "theoretical_loss": 3.480541855618542, "tokens_seen": 1855455232 }, { "epoch": 0.56, "learning_rate": 0.0004420638741774996, "loss": 0.0668, "theoretical_loss": 3.4805005920508796, "tokens_seen": 1855717376 }, { "epoch": 0.56, "learning_rate": 0.0004419836302359172, "loss": 0.0689, "theoretical_loss": 3.480459335943676, "tokens_seen": 1855979520 }, { "epoch": 0.56, "learning_rate": 0.0004419033862943348, "loss": 0.0684, "theoretical_loss": 3.4804180872945305, "tokens_seen": 1856241664 }, { "epoch": 0.56, "learning_rate": 0.0004418231423527524, "loss": 0.0653, "theoretical_loss": 3.4803768461010405, "tokens_seen": 1856503808 }, { "epoch": 0.56, "learning_rate": 0.00044174289841117, "loss": 0.0707, "theoretical_loss": 3.4803356123608062, "tokens_seen": 1856765952 }, { "epoch": 0.56, "learning_rate": 0.0004416626544695875, "loss": 0.0673, "theoretical_loss": 3.480294386071429, "tokens_seen": 1857028096 }, { "epoch": 0.56, "learning_rate": 0.00044158241052800513, "loss": 0.068, "theoretical_loss": 3.4802531672305106, "tokens_seen": 1857290240 }, { "epoch": 0.56, "learning_rate": 0.00044150216658642275, "loss": 0.0702, "theoretical_loss": 3.480211955835654, "tokens_seen": 1857552384 }, { "epoch": 0.56, "learning_rate": 0.0004414219226448403, "loss": 0.0687, "theoretical_loss": 3.4801707518844647, "tokens_seen": 1857814528 }, { "epoch": 0.56, "learning_rate": 0.0004413416787032579, "loss": 0.0655, "theoretical_loss": 3.480129555374547, "tokens_seen": 1858076672 }, { "epoch": 0.56, "learning_rate": 0.00044126143476167553, "loss": 0.0692, "theoretical_loss": 3.4800883663035083, "tokens_seen": 1858338816 }, { "epoch": 0.56, "learning_rate": 0.0004411811908200931, "loss": 0.0654, "theoretical_loss": 3.4800471846689556, "tokens_seen": 1858600960 }, { "epoch": 0.56, "learning_rate": 0.00044110094687851065, "loss": 0.0707, "theoretical_loss": 3.4800060104684984, "tokens_seen": 1858863104 }, { "epoch": 0.56, "learning_rate": 0.00044102070293692826, "loss": 0.0642, "theoretical_loss": 3.4799648436997463, "tokens_seen": 1859125248 }, { "epoch": 0.56, "learning_rate": 0.00044094045899534587, "loss": 0.0671, "theoretical_loss": 3.47992368436031, "tokens_seen": 1859387392 }, { "epoch": 0.56, "learning_rate": 0.00044086021505376343, "loss": 0.0661, "theoretical_loss": 3.4798825324478018, "tokens_seen": 1859649536 }, { "epoch": 0.56, "learning_rate": 0.00044077997111218104, "loss": 0.07, "theoretical_loss": 3.4798413879598353, "tokens_seen": 1859911680 }, { "epoch": 0.56, "learning_rate": 0.00044069972717059865, "loss": 0.0657, "theoretical_loss": 3.4798002508940242, "tokens_seen": 1860173824 }, { "epoch": 0.56, "learning_rate": 0.0004406194832290162, "loss": 0.0696, "theoretical_loss": 3.479759121247984, "tokens_seen": 1860435968 }, { "epoch": 0.56, "learning_rate": 0.0004405392392874338, "loss": 0.0692, "theoretical_loss": 3.479717999019332, "tokens_seen": 1860698112 }, { "epoch": 0.56, "learning_rate": 0.0004404589953458514, "loss": 0.067, "theoretical_loss": 3.4796768842056847, "tokens_seen": 1860960256 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.00022138823987916112, "objective/train/docs_used": 678968, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3515822887420654, "objective/train/original_loss": 1.3515822887420654, "objective/train/theoretical_loss": 3.4796357768046615, "objective/train/tokens_used": 1881682400, "objective/train/value_avg": -0.008880615234375, "objective/train/value_loss": 0.0006506952340714633, "objective/train/value_max": -6.604194641113281e-05, "objective/train/value_min": -0.92333984375, "objective/train/value_reward_corr": 0.6781693074134842, "objective/train/value_std": 0.0211181640625, "objective/train/weight_avg": 1.0004839897155762, "objective/train/weighted_lm_loss": 1.3513437509536743, "objective/train/weights_max": 2.46537446975708, "objective/train/weights_min": 0.0841136947274208, "theoretical_loss": 3.4796357768046615, "tokens_seen": 1861222400 }, { "epoch": 0.56, "learning_rate": 0.000440378751404269, "loss": 0.07, "theoretical_loss": 3.4796357768046615, "tokens_seen": 1861222400 }, { "epoch": 0.56, "learning_rate": 0.00044029850746268656, "loss": 0.0674, "theoretical_loss": 3.4795946768138823, "tokens_seen": 1861484544 }, { "epoch": 0.56, "learning_rate": 0.00044021826352110417, "loss": 0.0688, "theoretical_loss": 3.4795535842309677, "tokens_seen": 1861746688 }, { "epoch": 0.56, "learning_rate": 0.0004401380195795218, "loss": 0.0714, "theoretical_loss": 3.4795124990535395, "tokens_seen": 1862008832 }, { "epoch": 0.56, "learning_rate": 0.00044005777563793934, "loss": 0.0658, "theoretical_loss": 3.479471421279222, "tokens_seen": 1862270976 }, { "epoch": 0.56, "learning_rate": 0.00043997753169635695, "loss": 0.0695, "theoretical_loss": 3.4794303509056377, "tokens_seen": 1862533120 }, { "epoch": 0.56, "learning_rate": 0.0004398972877547745, "loss": 0.0679, "theoretical_loss": 3.479389287930413, "tokens_seen": 1862795264 }, { "epoch": 0.56, "learning_rate": 0.0004398170438131921, "loss": 0.0683, "theoretical_loss": 3.4793482323511746, "tokens_seen": 1863057408 }, { "epoch": 0.56, "learning_rate": 0.0004397367998716097, "loss": 0.071, "theoretical_loss": 3.479307184165549, "tokens_seen": 1863319552 }, { "epoch": 0.56, "learning_rate": 0.0004396565559300273, "loss": 0.0677, "theoretical_loss": 3.4792661433711656, "tokens_seen": 1863581696 }, { "epoch": 0.56, "learning_rate": 0.0004395763119884449, "loss": 0.0688, "theoretical_loss": 3.479225109965653, "tokens_seen": 1863843840 }, { "epoch": 0.56, "learning_rate": 0.00043949606804686246, "loss": 0.0706, "theoretical_loss": 3.4791840839466435, "tokens_seen": 1864105984 }, { "epoch": 0.56, "learning_rate": 0.0004394158241052801, "loss": 0.0689, "theoretical_loss": 3.479143065311768, "tokens_seen": 1864368128 }, { "epoch": 0.57, "learning_rate": 0.00043933558016369764, "loss": 0.0668, "theoretical_loss": 3.47910205405866, "tokens_seen": 1864630272 }, { "epoch": 0.57, "learning_rate": 0.0004392553362221152, "loss": 0.0649, "theoretical_loss": 3.479061050184953, "tokens_seen": 1864892416 }, { "epoch": 0.57, "learning_rate": 0.0004391750922805328, "loss": 0.0689, "theoretical_loss": 3.4790200536882825, "tokens_seen": 1865154560 }, { "epoch": 0.57, "learning_rate": 0.0004390948483389504, "loss": 0.0687, "theoretical_loss": 3.4789790645662846, "tokens_seen": 1865416704 }, { "epoch": 0.57, "learning_rate": 0.00043901460439736803, "loss": 0.07, "theoretical_loss": 3.478938082816597, "tokens_seen": 1865678848 }, { "epoch": 0.57, "learning_rate": 0.0004389343604557856, "loss": 0.068, "theoretical_loss": 3.4788971084368576, "tokens_seen": 1865940992 }, { "epoch": 0.57, "learning_rate": 0.0004388541165142032, "loss": 0.0706, "theoretical_loss": 3.478856141424706, "tokens_seen": 1866203136 }, { "epoch": 0.57, "learning_rate": 0.0004387738725726208, "loss": 0.0697, "theoretical_loss": 3.478815181777783, "tokens_seen": 1866465280 }, { "epoch": 0.57, "learning_rate": 0.0004386936286310383, "loss": 0.0665, "theoretical_loss": 3.4787742294937303, "tokens_seen": 1866727424 }, { "epoch": 0.57, "learning_rate": 0.00043861338468945593, "loss": 0.0688, "theoretical_loss": 3.4787332845701906, "tokens_seen": 1866989568 }, { "epoch": 0.57, "learning_rate": 0.00043853314074787354, "loss": 0.0672, "theoretical_loss": 3.4786923470048077, "tokens_seen": 1867251712 }, { "epoch": 0.57, "learning_rate": 0.00043845289680629116, "loss": 0.0699, "theoretical_loss": 3.478651416795227, "tokens_seen": 1867513856 }, { "epoch": 0.57, "objective/train/advantage_avg": -3.919263690477237e-05, "objective/train/docs_used": 681429, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3138712644577026, "objective/train/original_loss": 1.313871145248413, "objective/train/theoretical_loss": 3.478610493939094, "objective/train/tokens_used": 1888236000, "objective/train/value_avg": -0.005817413330078125, "objective/train/value_loss": 0.0002530510537326336, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.5234375, "objective/train/value_reward_corr": 0.7415155710875803, "objective/train/value_std": 0.0134124755859375, "objective/train/weight_avg": 1.0000742673873901, "objective/train/weighted_lm_loss": 1.3138768672943115, "objective/train/weights_max": 1.5628552436828613, "objective/train/weights_min": 0.36993688344955444, "theoretical_loss": 3.478610493939094, "tokens_seen": 1867776000 }, { "epoch": 0.57, "learning_rate": 0.0004383726528647087, "loss": 0.0671, "theoretical_loss": 3.478610493939094, "tokens_seen": 1867776000 }, { "epoch": 0.57, "learning_rate": 0.00043829240892312633, "loss": 0.0688, "theoretical_loss": 3.4785695784340556, "tokens_seen": 1868038144 }, { "epoch": 0.57, "learning_rate": 0.00043821216498154394, "loss": 0.0668, "theoretical_loss": 3.4785286702777602, "tokens_seen": 1868300288 }, { "epoch": 0.57, "learning_rate": 0.00043813192103996145, "loss": 0.0661, "theoretical_loss": 3.4784877694678573, "tokens_seen": 1868562432 }, { "epoch": 0.57, "learning_rate": 0.00043805167709837906, "loss": 0.0696, "theoretical_loss": 3.478446876001997, "tokens_seen": 1868824576 }, { "epoch": 0.57, "learning_rate": 0.00043797143315679667, "loss": 0.0703, "theoretical_loss": 3.4784059898778312, "tokens_seen": 1869086720 }, { "epoch": 0.57, "learning_rate": 0.0004378911892152143, "loss": 0.0695, "theoretical_loss": 3.4783651110930123, "tokens_seen": 1869348864 }, { "epoch": 0.57, "learning_rate": 0.00043781094527363184, "loss": 0.0697, "theoretical_loss": 3.478324239645193, "tokens_seen": 1869611008 }, { "epoch": 0.57, "learning_rate": 0.00043773070133204945, "loss": 0.0714, "theoretical_loss": 3.478283375532029, "tokens_seen": 1869873152 }, { "epoch": 0.57, "learning_rate": 0.00043765045739046707, "loss": 0.07, "theoretical_loss": 3.4782425187511756, "tokens_seen": 1870135296 }, { "epoch": 0.57, "learning_rate": 0.00043757021344888457, "loss": 0.0687, "theoretical_loss": 3.47820166930029, "tokens_seen": 1870397440 }, { "epoch": 0.57, "learning_rate": 0.0004374899695073022, "loss": 0.0675, "theoretical_loss": 3.47816082717703, "tokens_seen": 1870659584 }, { "epoch": 0.57, "learning_rate": 0.0004374097255657198, "loss": 0.0664, "theoretical_loss": 3.478119992379054, "tokens_seen": 1870921728 }, { "epoch": 0.57, "learning_rate": 0.0004373294816241374, "loss": 0.0721, "theoretical_loss": 3.478079164904022, "tokens_seen": 1871183872 }, { "epoch": 0.57, "learning_rate": 0.00043724923768255497, "loss": 0.071, "theoretical_loss": 3.4780383447495966, "tokens_seen": 1871446016 }, { "epoch": 0.57, "learning_rate": 0.0004371689937409726, "loss": 0.0667, "theoretical_loss": 3.477997531913439, "tokens_seen": 1871708160 }, { "epoch": 0.57, "learning_rate": 0.0004370887497993902, "loss": 0.0716, "theoretical_loss": 3.477956726393212, "tokens_seen": 1871970304 }, { "epoch": 0.57, "learning_rate": 0.00043700850585780775, "loss": 0.0702, "theoretical_loss": 3.477915928186581, "tokens_seen": 1872232448 }, { "epoch": 0.57, "learning_rate": 0.0004369282619162253, "loss": 0.0696, "theoretical_loss": 3.4778751372912105, "tokens_seen": 1872494592 }, { "epoch": 0.57, "learning_rate": 0.0004368480179746429, "loss": 0.0682, "theoretical_loss": 3.4778343537047673, "tokens_seen": 1872756736 }, { "epoch": 0.57, "learning_rate": 0.0004367677740330605, "loss": 0.0688, "theoretical_loss": 3.4777935774249196, "tokens_seen": 1873018880 }, { "epoch": 0.57, "learning_rate": 0.0004366875300914781, "loss": 0.0706, "theoretical_loss": 3.4777528084493348, "tokens_seen": 1873281024 }, { "epoch": 0.57, "learning_rate": 0.0004366072861498957, "loss": 0.0682, "theoretical_loss": 3.477712046775684, "tokens_seen": 1873543168 }, { "epoch": 0.57, "learning_rate": 0.0004365270422083133, "loss": 0.0684, "theoretical_loss": 3.477671292401637, "tokens_seen": 1873805312 }, { "epoch": 0.57, "learning_rate": 0.0004364467982667309, "loss": 0.068, "theoretical_loss": 3.477630545324866, "tokens_seen": 1874067456 }, { "epoch": 0.57, "objective/train/advantage_avg": -0.00036099160206504166, "objective/train/docs_used": 683764, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4436589479446411, "objective/train/original_loss": 1.4436590671539307, "objective/train/theoretical_loss": 3.477589805543044, "objective/train/tokens_used": 1894789600, "objective/train/value_avg": -0.00724029541015625, "objective/train/value_loss": 0.00032180329435504973, "objective/train/value_max": -3.8504600524902344e-05, "objective/train/value_min": -0.9072265625, "objective/train/value_reward_corr": 0.6553256771569635, "objective/train/value_std": 0.01497650146484375, "objective/train/weight_avg": 0.9997800588607788, "objective/train/weighted_lm_loss": 1.4435096979141235, "objective/train/weights_max": 1.7920018434524536, "objective/train/weights_min": 0.3784064054489136, "theoretical_loss": 3.477589805543044, "tokens_seen": 1874329600 }, { "epoch": 0.57, "learning_rate": 0.00043636655432514843, "loss": 0.0707, "theoretical_loss": 3.477589805543044, "tokens_seen": 1874329600 }, { "epoch": 0.57, "learning_rate": 0.00043628631038356605, "loss": 0.072, "theoretical_loss": 3.477549073053845, "tokens_seen": 1874591744 }, { "epoch": 0.57, "learning_rate": 0.0004362060664419836, "loss": 0.0702, "theoretical_loss": 3.477508347854944, "tokens_seen": 1874853888 }, { "epoch": 0.57, "learning_rate": 0.0004361258225004012, "loss": 0.0688, "theoretical_loss": 3.477467629944017, "tokens_seen": 1875116032 }, { "epoch": 0.57, "learning_rate": 0.00043604557855881883, "loss": 0.0667, "theoretical_loss": 3.4774269193187406, "tokens_seen": 1875378176 }, { "epoch": 0.57, "learning_rate": 0.00043596533461723644, "loss": 0.0697, "theoretical_loss": 3.4773862159767943, "tokens_seen": 1875640320 }, { "epoch": 0.57, "learning_rate": 0.000435885090675654, "loss": 0.0696, "theoretical_loss": 3.4773455199158567, "tokens_seen": 1875902464 }, { "epoch": 0.57, "learning_rate": 0.0004358048467340716, "loss": 0.0718, "theoretical_loss": 3.4773048311336083, "tokens_seen": 1876164608 }, { "epoch": 0.57, "learning_rate": 0.0004357246027924892, "loss": 0.0682, "theoretical_loss": 3.4772641496277306, "tokens_seen": 1876426752 }, { "epoch": 0.57, "learning_rate": 0.00043564435885090673, "loss": 0.0674, "theoretical_loss": 3.4772234753959057, "tokens_seen": 1876688896 }, { "epoch": 0.57, "learning_rate": 0.00043556411490932434, "loss": 0.0691, "theoretical_loss": 3.477182808435818, "tokens_seen": 1876951040 }, { "epoch": 0.57, "learning_rate": 0.00043548387096774196, "loss": 0.0676, "theoretical_loss": 3.477142148745151, "tokens_seen": 1877213184 }, { "epoch": 0.57, "learning_rate": 0.00043540362702615957, "loss": 0.07, "theoretical_loss": 3.477101496321591, "tokens_seen": 1877475328 }, { "epoch": 0.57, "learning_rate": 0.00043532338308457713, "loss": 0.0734, "theoretical_loss": 3.4770608511628254, "tokens_seen": 1877737472 }, { "epoch": 0.57, "learning_rate": 0.00043524313914299474, "loss": 0.0724, "theoretical_loss": 3.477020213266541, "tokens_seen": 1877999616 }, { "epoch": 0.57, "learning_rate": 0.0004351628952014123, "loss": 0.0716, "theoretical_loss": 3.476979582630427, "tokens_seen": 1878261760 }, { "epoch": 0.57, "learning_rate": 0.00043508265125982986, "loss": 0.0701, "theoretical_loss": 3.4769389592521733, "tokens_seen": 1878523904 }, { "epoch": 0.57, "learning_rate": 0.00043500240731824747, "loss": 0.0721, "theoretical_loss": 3.476898343129471, "tokens_seen": 1878786048 }, { "epoch": 0.57, "learning_rate": 0.0004349221633766651, "loss": 0.0659, "theoretical_loss": 3.476857734260012, "tokens_seen": 1879048192 }, { "epoch": 0.57, "learning_rate": 0.00043484191943508264, "loss": 0.0692, "theoretical_loss": 3.4768171326414894, "tokens_seen": 1879310336 }, { "epoch": 0.57, "learning_rate": 0.00043476167549350025, "loss": 0.0677, "theoretical_loss": 3.4767765382715976, "tokens_seen": 1879572480 }, { "epoch": 0.57, "learning_rate": 0.00043468143155191787, "loss": 0.0697, "theoretical_loss": 3.4767359511480316, "tokens_seen": 1879834624 }, { "epoch": 0.57, "learning_rate": 0.0004346011876103354, "loss": 0.0692, "theoretical_loss": 3.4766953712684874, "tokens_seen": 1880096768 }, { "epoch": 0.57, "learning_rate": 0.000434520943668753, "loss": 0.0678, "theoretical_loss": 3.476654798630663, "tokens_seen": 1880358912 }, { "epoch": 0.57, "learning_rate": 0.0004344406997271706, "loss": 0.0683, "theoretical_loss": 3.476614233232256, "tokens_seen": 1880621056 }, { "epoch": 0.57, "objective/train/advantage_avg": -0.0004898487823083997, "objective/train/docs_used": 686045, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3733599185943604, "objective/train/original_loss": 1.3733599185943604, "objective/train/theoretical_loss": 3.476573675070966, "objective/train/tokens_used": 1901343200, "objective/train/value_avg": -0.0057830810546875, "objective/train/value_loss": 0.00014441322127822787, "objective/train/value_max": -3.510713577270508e-05, "objective/train/value_min": -0.3486328125, "objective/train/value_reward_corr": 0.7300531562767005, "objective/train/value_std": 0.010986328125, "objective/train/weight_avg": 0.9995789527893066, "objective/train/weighted_lm_loss": 1.3724981546401978, "objective/train/weights_max": 1.1518362760543823, "objective/train/weights_min": 0.36823925375938416, "theoretical_loss": 3.476573675070966, "tokens_seen": 1880883200 }, { "epoch": 0.57, "learning_rate": 0.0004343604557855882, "loss": 0.0674, "theoretical_loss": 3.476573675070966, "tokens_seen": 1880883200 }, { "epoch": 0.57, "learning_rate": 0.00043428021184400577, "loss": 0.0704, "theoretical_loss": 3.4765331241444937, "tokens_seen": 1881145344 }, { "epoch": 0.57, "learning_rate": 0.0004341999679024234, "loss": 0.0665, "theoretical_loss": 3.4764925804505404, "tokens_seen": 1881407488 }, { "epoch": 0.57, "learning_rate": 0.000434119723960841, "loss": 0.067, "theoretical_loss": 3.476452043986809, "tokens_seen": 1881669632 }, { "epoch": 0.57, "learning_rate": 0.0004340394800192586, "loss": 0.067, "theoretical_loss": 3.4764115147510033, "tokens_seen": 1881931776 }, { "epoch": 0.57, "learning_rate": 0.0004339592360776761, "loss": 0.0705, "theoretical_loss": 3.4763709927408266, "tokens_seen": 1882193920 }, { "epoch": 0.57, "learning_rate": 0.0004338789921360937, "loss": 0.0678, "theoretical_loss": 3.4763304779539865, "tokens_seen": 1882456064 }, { "epoch": 0.57, "learning_rate": 0.00043379874819451133, "loss": 0.0713, "theoretical_loss": 3.476289970388188, "tokens_seen": 1882718208 }, { "epoch": 0.57, "learning_rate": 0.0004337185042529289, "loss": 0.0694, "theoretical_loss": 3.4762494700411404, "tokens_seen": 1882980352 }, { "epoch": 0.57, "learning_rate": 0.0004336382603113465, "loss": 0.0681, "theoretical_loss": 3.476208976910552, "tokens_seen": 1883242496 }, { "epoch": 0.57, "learning_rate": 0.0004335580163697641, "loss": 0.0688, "theoretical_loss": 3.476168490994132, "tokens_seen": 1883504640 }, { "epoch": 0.57, "learning_rate": 0.00043347777242818173, "loss": 0.0692, "theoretical_loss": 3.4761280122895926, "tokens_seen": 1883766784 }, { "epoch": 0.57, "learning_rate": 0.00043339752848659923, "loss": 0.0704, "theoretical_loss": 3.476087540794645, "tokens_seen": 1884028928 }, { "epoch": 0.57, "learning_rate": 0.00043331728454501685, "loss": 0.0708, "theoretical_loss": 3.476047076507002, "tokens_seen": 1884291072 }, { "epoch": 0.57, "learning_rate": 0.00043323704060343446, "loss": 0.0687, "theoretical_loss": 3.4760066194243784, "tokens_seen": 1884553216 }, { "epoch": 0.57, "learning_rate": 0.000433156796661852, "loss": 0.0729, "theoretical_loss": 3.4759661695444892, "tokens_seen": 1884815360 }, { "epoch": 0.57, "learning_rate": 0.00043307655272026963, "loss": 0.071, "theoretical_loss": 3.47592572686505, "tokens_seen": 1885077504 }, { "epoch": 0.57, "learning_rate": 0.00043299630877868724, "loss": 0.071, "theoretical_loss": 3.4758852913837783, "tokens_seen": 1885339648 }, { "epoch": 0.57, "learning_rate": 0.0004329160648371048, "loss": 0.0671, "theoretical_loss": 3.475844863098393, "tokens_seen": 1885601792 }, { "epoch": 0.57, "learning_rate": 0.00043283582089552236, "loss": 0.0684, "theoretical_loss": 3.475804442006612, "tokens_seen": 1885863936 }, { "epoch": 0.57, "learning_rate": 0.00043275557695393997, "loss": 0.0718, "theoretical_loss": 3.4757640281061573, "tokens_seen": 1886126080 }, { "epoch": 0.57, "learning_rate": 0.0004326753330123576, "loss": 0.0693, "theoretical_loss": 3.4757236213947484, "tokens_seen": 1886388224 }, { "epoch": 0.57, "learning_rate": 0.00043259508907077514, "loss": 0.0689, "theoretical_loss": 3.4756832218701095, "tokens_seen": 1886650368 }, { "epoch": 0.57, "learning_rate": 0.00043251484512919276, "loss": 0.0684, "theoretical_loss": 3.475642829529963, "tokens_seen": 1886912512 }, { "epoch": 0.57, "learning_rate": 0.00043243460118761037, "loss": 0.0686, "theoretical_loss": 3.4756024443720337, "tokens_seen": 1887174656 }, { "epoch": 0.57, "objective/train/advantage_avg": -0.0003805331070907414, "objective/train/docs_used": 688313, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3872874975204468, "objective/train/original_loss": 1.3872874975204468, "objective/train/theoretical_loss": 3.4755620663940467, "objective/train/tokens_used": 1907896800, "objective/train/value_avg": -0.00792694091796875, "objective/train/value_loss": 0.0004488099948503077, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.88671875, "objective/train/value_reward_corr": 0.72679213095778, "objective/train/value_std": 0.021087646484375, "objective/train/weight_avg": 0.9998147487640381, "objective/train/weighted_lm_loss": 1.3864668607711792, "objective/train/weights_max": 1.4310088157653809, "objective/train/weights_min": 0.3778928816318512, "theoretical_loss": 3.4755620663940467, "tokens_seen": 1887436800 }, { "epoch": 0.57, "learning_rate": 0.00043235435724602793, "loss": 0.0692, "theoretical_loss": 3.4755620663940467, "tokens_seen": 1887436800 }, { "epoch": 0.57, "learning_rate": 0.00043227411330444554, "loss": 0.0712, "theoretical_loss": 3.4755216955937294, "tokens_seen": 1887698944 }, { "epoch": 0.57, "learning_rate": 0.0004321938693628631, "loss": 0.0672, "theoretical_loss": 3.4754813319688087, "tokens_seen": 1887961088 }, { "epoch": 0.57, "learning_rate": 0.0004321136254212807, "loss": 0.0683, "theoretical_loss": 3.4754409755170133, "tokens_seen": 1888223232 }, { "epoch": 0.57, "learning_rate": 0.00043203338147969827, "loss": 0.0685, "theoretical_loss": 3.4754006262360733, "tokens_seen": 1888485376 }, { "epoch": 0.57, "learning_rate": 0.0004319531375381159, "loss": 0.0638, "theoretical_loss": 3.4753602841237186, "tokens_seen": 1888747520 }, { "epoch": 0.57, "learning_rate": 0.0004318728935965335, "loss": 0.0671, "theoretical_loss": 3.4753199491776816, "tokens_seen": 1889009664 }, { "epoch": 0.57, "learning_rate": 0.00043179264965495105, "loss": 0.0673, "theoretical_loss": 3.475279621395695, "tokens_seen": 1889271808 }, { "epoch": 0.57, "learning_rate": 0.00043171240571336867, "loss": 0.068, "theoretical_loss": 3.4752393007754923, "tokens_seen": 1889533952 }, { "epoch": 0.57, "learning_rate": 0.0004316321617717862, "loss": 0.0694, "theoretical_loss": 3.4751989873148084, "tokens_seen": 1889796096 }, { "epoch": 0.57, "learning_rate": 0.00043155191783020384, "loss": 0.0716, "theoretical_loss": 3.4751586810113793, "tokens_seen": 1890058240 }, { "epoch": 0.57, "learning_rate": 0.0004314716738886214, "loss": 0.0668, "theoretical_loss": 3.4751183818629414, "tokens_seen": 1890320384 }, { "epoch": 0.57, "learning_rate": 0.000431391429947039, "loss": 0.069, "theoretical_loss": 3.4750780898672335, "tokens_seen": 1890582528 }, { "epoch": 0.57, "learning_rate": 0.0004313111860054566, "loss": 0.0654, "theoretical_loss": 3.475037805021994, "tokens_seen": 1890844672 }, { "epoch": 0.57, "learning_rate": 0.0004312309420638742, "loss": 0.0672, "theoretical_loss": 3.4749975273249625, "tokens_seen": 1891106816 }, { "epoch": 0.57, "learning_rate": 0.0004311506981222918, "loss": 0.0695, "theoretical_loss": 3.4749572567738807, "tokens_seen": 1891368960 }, { "epoch": 0.57, "learning_rate": 0.0004310704541807094, "loss": 0.0693, "theoretical_loss": 3.4749169933664903, "tokens_seen": 1891631104 }, { "epoch": 0.57, "learning_rate": 0.0004309902102391269, "loss": 0.0671, "theoretical_loss": 3.4748767371005345, "tokens_seen": 1891893248 }, { "epoch": 0.57, "learning_rate": 0.0004309099662975445, "loss": 0.0684, "theoretical_loss": 3.474836487973757, "tokens_seen": 1892155392 }, { "epoch": 0.57, "learning_rate": 0.00043082972235596213, "loss": 0.0699, "theoretical_loss": 3.474796245983903, "tokens_seen": 1892417536 }, { "epoch": 0.57, "learning_rate": 0.00043074947841437975, "loss": 0.0703, "theoretical_loss": 3.4747560111287195, "tokens_seen": 1892679680 }, { "epoch": 0.57, "learning_rate": 0.0004306692344727973, "loss": 0.0673, "theoretical_loss": 3.4747157834059523, "tokens_seen": 1892941824 }, { "epoch": 0.57, "learning_rate": 0.0004305889905312149, "loss": 0.0679, "theoretical_loss": 3.47467556281335, "tokens_seen": 1893203968 }, { "epoch": 0.57, "learning_rate": 0.00043050874658963253, "loss": 0.0699, "theoretical_loss": 3.474635349348662, "tokens_seen": 1893466112 }, { "epoch": 0.57, "learning_rate": 0.00043042850264805003, "loss": 0.0683, "theoretical_loss": 3.4745951430096387, "tokens_seen": 1893728256 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.0003969900426454842, "objective/train/docs_used": 690600, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.344144344329834, "objective/train/original_loss": 1.344144344329834, "objective/train/theoretical_loss": 3.474554943794031, "objective/train/tokens_used": 1914450400, "objective/train/value_avg": -0.005825042724609375, "objective/train/value_loss": 0.00022731033095624298, "objective/train/value_max": -3.7610530853271484e-05, "objective/train/value_min": -0.96142578125, "objective/train/value_reward_corr": 0.781940150301403, "objective/train/value_std": 0.0180816650390625, "objective/train/weight_avg": 1.0004974603652954, "objective/train/weighted_lm_loss": 1.3450180292129517, "objective/train/weights_max": 1.4332808256149292, "objective/train/weights_min": 0.3768678903579712, "theoretical_loss": 3.474554943794031, "tokens_seen": 1893990400 }, { "epoch": 0.57, "learning_rate": 0.00043034825870646765, "loss": 0.0688, "theoretical_loss": 3.474554943794031, "tokens_seen": 1893990400 }, { "epoch": 0.57, "learning_rate": 0.00043026801476488526, "loss": 0.0697, "theoretical_loss": 3.4745147516995916, "tokens_seen": 1894252544 }, { "epoch": 0.57, "learning_rate": 0.00043018777082330287, "loss": 0.0688, "theoretical_loss": 3.474474566724073, "tokens_seen": 1894514688 }, { "epoch": 0.57, "learning_rate": 0.00043010752688172043, "loss": 0.0692, "theoretical_loss": 3.4744343888652303, "tokens_seen": 1894776832 }, { "epoch": 0.57, "learning_rate": 0.00043002728294013804, "loss": 0.0664, "theoretical_loss": 3.474394218120818, "tokens_seen": 1895038976 }, { "epoch": 0.57, "learning_rate": 0.00042994703899855565, "loss": 0.0689, "theoretical_loss": 3.474354054488593, "tokens_seen": 1895301120 }, { "epoch": 0.57, "learning_rate": 0.00042986679505697316, "loss": 0.0679, "theoretical_loss": 3.4743138979663133, "tokens_seen": 1895563264 }, { "epoch": 0.57, "learning_rate": 0.00042978655111539077, "loss": 0.072, "theoretical_loss": 3.4742737485517354, "tokens_seen": 1895825408 }, { "epoch": 0.57, "learning_rate": 0.0004297063071738084, "loss": 0.0683, "theoretical_loss": 3.4742336062426205, "tokens_seen": 1896087552 }, { "epoch": 0.57, "learning_rate": 0.000429626063232226, "loss": 0.0682, "theoretical_loss": 3.4741934710367284, "tokens_seen": 1896349696 }, { "epoch": 0.57, "learning_rate": 0.00042954581929064356, "loss": 0.0695, "theoretical_loss": 3.47415334293182, "tokens_seen": 1896611840 }, { "epoch": 0.57, "learning_rate": 0.00042946557534906117, "loss": 0.0684, "theoretical_loss": 3.4741132219256583, "tokens_seen": 1896873984 }, { "epoch": 0.57, "learning_rate": 0.0004293853314074788, "loss": 0.0688, "theoretical_loss": 3.4740731080160066, "tokens_seen": 1897136128 }, { "epoch": 0.57, "learning_rate": 0.00042930508746589634, "loss": 0.0718, "theoretical_loss": 3.4740330012006293, "tokens_seen": 1897398272 }, { "epoch": 0.58, "learning_rate": 0.0004292248435243139, "loss": 0.0682, "theoretical_loss": 3.473992901477292, "tokens_seen": 1897660416 }, { "epoch": 0.58, "learning_rate": 0.0004291445995827315, "loss": 0.0673, "theoretical_loss": 3.4739528088437606, "tokens_seen": 1897922560 }, { "epoch": 0.58, "learning_rate": 0.0004290643556411491, "loss": 0.0673, "theoretical_loss": 3.4739127232978033, "tokens_seen": 1898184704 }, { "epoch": 0.58, "learning_rate": 0.0004289841116995667, "loss": 0.0657, "theoretical_loss": 3.473872644837189, "tokens_seen": 1898446848 }, { "epoch": 0.58, "learning_rate": 0.0004289038677579843, "loss": 0.0703, "theoretical_loss": 3.4738325734596858, "tokens_seen": 1898708992 }, { "epoch": 0.58, "learning_rate": 0.0004288236238164019, "loss": 0.0652, "theoretical_loss": 3.473792509163066, "tokens_seen": 1898971136 }, { "epoch": 0.58, "learning_rate": 0.00042874337987481946, "loss": 0.068, "theoretical_loss": 3.4737524519450993, "tokens_seen": 1899233280 }, { "epoch": 0.58, "learning_rate": 0.000428663135933237, "loss": 0.068, "theoretical_loss": 3.4737124018035597, "tokens_seen": 1899495424 }, { "epoch": 0.58, "learning_rate": 0.00042858289199165464, "loss": 0.0677, "theoretical_loss": 3.47367235873622, "tokens_seen": 1899757568 }, { "epoch": 0.58, "learning_rate": 0.0004285026480500722, "loss": 0.0677, "theoretical_loss": 3.4736323227408548, "tokens_seen": 1900019712 }, { "epoch": 0.58, "learning_rate": 0.0004284224041084898, "loss": 0.0687, "theoretical_loss": 3.4735922938152397, "tokens_seen": 1900281856 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.000148945051478222, "objective/train/docs_used": 693007, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3303676843643188, "objective/train/original_loss": 1.3303678035736084, "objective/train/theoretical_loss": 3.4735522719571517, "objective/train/tokens_used": 1921004000, "objective/train/value_avg": -0.00881195068359375, "objective/train/value_loss": 0.0003545716463122517, "objective/train/value_max": -6.109476089477539e-05, "objective/train/value_min": -0.9736328125, "objective/train/value_reward_corr": 0.7112460178861546, "objective/train/value_std": 0.018310546875, "objective/train/weight_avg": 1.0003077983856201, "objective/train/weighted_lm_loss": 1.331317663192749, "objective/train/weights_max": 1.3738635778427124, "objective/train/weights_min": 0.3704579770565033, "theoretical_loss": 3.4735522719571517, "tokens_seen": 1900544000 }, { "epoch": 0.58, "learning_rate": 0.0004283421601669074, "loss": 0.068, "theoretical_loss": 3.4735522719571517, "tokens_seen": 1900544000 }, { "epoch": 0.58, "learning_rate": 0.00042826191622532503, "loss": 0.0671, "theoretical_loss": 3.473512257164368, "tokens_seen": 1900806144 }, { "epoch": 0.58, "learning_rate": 0.0004281816722837426, "loss": 0.068, "theoretical_loss": 3.4734722494346673, "tokens_seen": 1901068288 }, { "epoch": 0.58, "learning_rate": 0.00042810142834216015, "loss": 0.0685, "theoretical_loss": 3.473432248765829, "tokens_seen": 1901330432 }, { "epoch": 0.58, "learning_rate": 0.00042802118440057776, "loss": 0.0694, "theoretical_loss": 3.473392255155634, "tokens_seen": 1901592576 }, { "epoch": 0.58, "learning_rate": 0.0004279409404589953, "loss": 0.0694, "theoretical_loss": 3.4733522686018636, "tokens_seen": 1901854720 }, { "epoch": 0.58, "learning_rate": 0.00042786069651741293, "loss": 0.0696, "theoretical_loss": 3.4733122891023007, "tokens_seen": 1902116864 }, { "epoch": 0.58, "learning_rate": 0.00042778045257583054, "loss": 0.0681, "theoretical_loss": 3.473272316654729, "tokens_seen": 1902379008 }, { "epoch": 0.58, "learning_rate": 0.00042770020863424816, "loss": 0.0712, "theoretical_loss": 3.4732323512569323, "tokens_seen": 1902641152 }, { "epoch": 0.58, "learning_rate": 0.0004276199646926657, "loss": 0.068, "theoretical_loss": 3.473192392906697, "tokens_seen": 1902903296 }, { "epoch": 0.58, "learning_rate": 0.00042753972075108333, "loss": 0.0689, "theoretical_loss": 3.473152441601809, "tokens_seen": 1903165440 }, { "epoch": 0.58, "learning_rate": 0.0004274594768095009, "loss": 0.0723, "theoretical_loss": 3.473112497340057, "tokens_seen": 1903427584 }, { "epoch": 0.58, "learning_rate": 0.00042737923286791845, "loss": 0.0686, "theoretical_loss": 3.473072560119229, "tokens_seen": 1903689728 }, { "epoch": 0.58, "learning_rate": 0.00042729898892633606, "loss": 0.0701, "theoretical_loss": 3.473032629937114, "tokens_seen": 1903951872 }, { "epoch": 0.58, "learning_rate": 0.00042721874498475367, "loss": 0.068, "theoretical_loss": 3.472992706791504, "tokens_seen": 1904214016 }, { "epoch": 0.58, "learning_rate": 0.0004271385010431713, "loss": 0.0688, "theoretical_loss": 3.472952790680189, "tokens_seen": 1904476160 }, { "epoch": 0.58, "learning_rate": 0.00042705825710158884, "loss": 0.0689, "theoretical_loss": 3.472912881600963, "tokens_seen": 1904738304 }, { "epoch": 0.58, "learning_rate": 0.00042697801316000645, "loss": 0.0688, "theoretical_loss": 3.4728729795516182, "tokens_seen": 1905000448 }, { "epoch": 0.58, "learning_rate": 0.000426897769218424, "loss": 0.0697, "theoretical_loss": 3.4728330845299507, "tokens_seen": 1905262592 }, { "epoch": 0.58, "learning_rate": 0.00042681752527684157, "loss": 0.0694, "theoretical_loss": 3.472793196533755, "tokens_seen": 1905524736 }, { "epoch": 0.58, "learning_rate": 0.0004267372813352592, "loss": 0.0704, "theoretical_loss": 3.472753315560828, "tokens_seen": 1905786880 }, { "epoch": 0.58, "learning_rate": 0.0004266570373936768, "loss": 0.0673, "theoretical_loss": 3.4727134416089678, "tokens_seen": 1906049024 }, { "epoch": 0.58, "learning_rate": 0.00042657679345209435, "loss": 0.0713, "theoretical_loss": 3.472673574675972, "tokens_seen": 1906311168 }, { "epoch": 0.58, "learning_rate": 0.00042649654951051197, "loss": 0.0667, "theoretical_loss": 3.472633714759641, "tokens_seen": 1906573312 }, { "epoch": 0.58, "learning_rate": 0.0004264163055689296, "loss": 0.0695, "theoretical_loss": 3.4725938618577743, "tokens_seen": 1906835456 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.0010231251362711191, "objective/train/docs_used": 695522, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.331170678138733, "objective/train/original_loss": 1.3311705589294434, "objective/train/theoretical_loss": 3.4725540159681745, "objective/train/tokens_used": 1927557600, "objective/train/value_avg": -0.0065460205078125, "objective/train/value_loss": 0.0002442924596834928, "objective/train/value_max": -6.35385513305664e-05, "objective/train/value_min": -0.9287109375, "objective/train/value_reward_corr": 0.7102165433933779, "objective/train/value_std": 0.016693115234375, "objective/train/weight_avg": 1.0011364221572876, "objective/train/weighted_lm_loss": 1.3334146738052368, "objective/train/weights_max": 1.9407720565795898, "objective/train/weights_min": 0.368646502494812, "theoretical_loss": 3.4725540159681745, "tokens_seen": 1907097600 }, { "epoch": 0.58, "learning_rate": 0.0004263360616273472, "loss": 0.069, "theoretical_loss": 3.4725540159681745, "tokens_seen": 1907097600 }, { "epoch": 0.58, "learning_rate": 0.0004262558176857647, "loss": 0.0669, "theoretical_loss": 3.4725141770886436, "tokens_seen": 1907359744 }, { "epoch": 0.58, "learning_rate": 0.0004261755737441823, "loss": 0.0666, "theoretical_loss": 3.4724743452169857, "tokens_seen": 1907621888 }, { "epoch": 0.58, "learning_rate": 0.0004260953298025999, "loss": 0.0678, "theoretical_loss": 3.4724345203510047, "tokens_seen": 1907884032 }, { "epoch": 0.58, "learning_rate": 0.0004260150858610175, "loss": 0.0679, "theoretical_loss": 3.472394702488506, "tokens_seen": 1908146176 }, { "epoch": 0.58, "learning_rate": 0.0004259348419194351, "loss": 0.0672, "theoretical_loss": 3.4723548916272966, "tokens_seen": 1908408320 }, { "epoch": 0.58, "learning_rate": 0.0004258545979778527, "loss": 0.0693, "theoretical_loss": 3.4723150877651836, "tokens_seen": 1908670464 }, { "epoch": 0.58, "learning_rate": 0.0004257743540362703, "loss": 0.0681, "theoretical_loss": 3.472275290899976, "tokens_seen": 1908932608 }, { "epoch": 0.58, "learning_rate": 0.0004256941100946878, "loss": 0.0689, "theoretical_loss": 3.472235501029483, "tokens_seen": 1909194752 }, { "epoch": 0.58, "learning_rate": 0.00042561386615310543, "loss": 0.0699, "theoretical_loss": 3.4721957181515144, "tokens_seen": 1909456896 }, { "epoch": 0.58, "learning_rate": 0.00042553362221152305, "loss": 0.0694, "theoretical_loss": 3.472155942263883, "tokens_seen": 1909719040 }, { "epoch": 0.58, "learning_rate": 0.0004254533782699406, "loss": 0.07, "theoretical_loss": 3.4721161733643995, "tokens_seen": 1909981184 }, { "epoch": 0.58, "learning_rate": 0.0004253731343283582, "loss": 0.0675, "theoretical_loss": 3.472076411450878, "tokens_seen": 1910243328 }, { "epoch": 0.58, "learning_rate": 0.00042529289038677583, "loss": 0.0679, "theoretical_loss": 3.472036656521134, "tokens_seen": 1910505472 }, { "epoch": 0.58, "learning_rate": 0.00042521264644519344, "loss": 0.0662, "theoretical_loss": 3.4719969085729816, "tokens_seen": 1910767616 }, { "epoch": 0.58, "learning_rate": 0.00042513240250361095, "loss": 0.0717, "theoretical_loss": 3.4719571676042373, "tokens_seen": 1911029760 }, { "epoch": 0.58, "learning_rate": 0.00042505215856202856, "loss": 0.0708, "theoretical_loss": 3.4719174336127185, "tokens_seen": 1911291904 }, { "epoch": 0.58, "learning_rate": 0.0004249719146204462, "loss": 0.0691, "theoretical_loss": 3.471877706596244, "tokens_seen": 1911554048 }, { "epoch": 0.58, "learning_rate": 0.00042489167067886373, "loss": 0.0669, "theoretical_loss": 3.4718379865526323, "tokens_seen": 1911816192 }, { "epoch": 0.58, "learning_rate": 0.00042481142673728134, "loss": 0.0705, "theoretical_loss": 3.4717982734797044, "tokens_seen": 1912078336 }, { "epoch": 0.58, "learning_rate": 0.00042473118279569896, "loss": 0.0695, "theoretical_loss": 3.4717585673752813, "tokens_seen": 1912340480 }, { "epoch": 0.58, "learning_rate": 0.0004246509388541165, "loss": 0.0692, "theoretical_loss": 3.4717188682371853, "tokens_seen": 1912602624 }, { "epoch": 0.58, "learning_rate": 0.00042457069491253413, "loss": 0.0678, "theoretical_loss": 3.471679176063239, "tokens_seen": 1912864768 }, { "epoch": 0.58, "learning_rate": 0.0004244904509709517, "loss": 0.0682, "theoretical_loss": 3.4716394908512678, "tokens_seen": 1913126912 }, { "epoch": 0.58, "learning_rate": 0.0004244102070293693, "loss": 0.0661, "theoretical_loss": 3.471599812599095, "tokens_seen": 1913389056 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.0013305656611919403, "objective/train/docs_used": 697951, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4070523977279663, "objective/train/original_loss": 1.4070522785186768, "objective/train/theoretical_loss": 3.4715601413045487, "objective/train/tokens_used": 1934111200, "objective/train/value_avg": -0.01123046875, "objective/train/value_loss": 0.0003065956407226622, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.900390625, "objective/train/value_reward_corr": 0.7024227752945549, "objective/train/value_std": 0.018707275390625, "objective/train/weight_avg": 1.0014679431915283, "objective/train/weighted_lm_loss": 1.4086270332336426, "objective/train/weights_max": 2.3405609130859375, "objective/train/weights_min": 0.2271009087562561, "theoretical_loss": 3.4715601413045487, "tokens_seen": 1913651200 }, { "epoch": 0.58, "learning_rate": 0.00042432996308778686, "loss": 0.0712, "theoretical_loss": 3.4715601413045487, "tokens_seen": 1913651200 }, { "epoch": 0.58, "learning_rate": 0.00042424971914620447, "loss": 0.0687, "theoretical_loss": 3.4715204769654555, "tokens_seen": 1913913344 }, { "epoch": 0.58, "learning_rate": 0.0004241694752046221, "loss": 0.0694, "theoretical_loss": 3.471480819579643, "tokens_seen": 1914175488 }, { "epoch": 0.58, "learning_rate": 0.00042408923126303964, "loss": 0.0684, "theoretical_loss": 3.4714411691449403, "tokens_seen": 1914437632 }, { "epoch": 0.58, "learning_rate": 0.00042400898732145725, "loss": 0.0674, "theoretical_loss": 3.4714015256591777, "tokens_seen": 1914699776 }, { "epoch": 0.58, "learning_rate": 0.0004239287433798748, "loss": 0.0716, "theoretical_loss": 3.4713618891201863, "tokens_seen": 1914961920 }, { "epoch": 0.58, "learning_rate": 0.0004238484994382924, "loss": 0.0694, "theoretical_loss": 3.471322259525798, "tokens_seen": 1915224064 }, { "epoch": 0.58, "learning_rate": 0.00042376825549671, "loss": 0.0696, "theoretical_loss": 3.4712826368738456, "tokens_seen": 1915486208 }, { "epoch": 0.58, "learning_rate": 0.0004236880115551276, "loss": 0.0684, "theoretical_loss": 3.471243021162163, "tokens_seen": 1915748352 }, { "epoch": 0.58, "learning_rate": 0.0004236077676135452, "loss": 0.0675, "theoretical_loss": 3.4712034123885855, "tokens_seen": 1916010496 }, { "epoch": 0.58, "learning_rate": 0.00042352752367196277, "loss": 0.0661, "theoretical_loss": 3.471163810550949, "tokens_seen": 1916272640 }, { "epoch": 0.58, "learning_rate": 0.0004234472797303804, "loss": 0.0696, "theoretical_loss": 3.47112421564709, "tokens_seen": 1916534784 }, { "epoch": 0.58, "learning_rate": 0.00042336703578879794, "loss": 0.0673, "theoretical_loss": 3.4710846276748466, "tokens_seen": 1916796928 }, { "epoch": 0.58, "learning_rate": 0.00042328679184721555, "loss": 0.0682, "theoretical_loss": 3.4710450466320575, "tokens_seen": 1917059072 }, { "epoch": 0.58, "learning_rate": 0.0004232065479056331, "loss": 0.0677, "theoretical_loss": 3.471005472516562, "tokens_seen": 1917321216 }, { "epoch": 0.58, "learning_rate": 0.0004231263039640507, "loss": 0.0687, "theoretical_loss": 3.4709659053262016, "tokens_seen": 1917583360 }, { "epoch": 0.58, "learning_rate": 0.00042304606002246833, "loss": 0.0713, "theoretical_loss": 3.470926345058818, "tokens_seen": 1917845504 }, { "epoch": 0.58, "learning_rate": 0.0004229658160808859, "loss": 0.0666, "theoretical_loss": 3.4708867917122532, "tokens_seen": 1918107648 }, { "epoch": 0.58, "learning_rate": 0.0004228855721393035, "loss": 0.0696, "theoretical_loss": 3.470847245284351, "tokens_seen": 1918369792 }, { "epoch": 0.58, "learning_rate": 0.0004228053281977211, "loss": 0.067, "theoretical_loss": 3.4708077057729567, "tokens_seen": 1918631936 }, { "epoch": 0.58, "learning_rate": 0.0004227250842561386, "loss": 0.069, "theoretical_loss": 3.4707681731759155, "tokens_seen": 1918894080 }, { "epoch": 0.58, "learning_rate": 0.00042264484031455623, "loss": 0.0679, "theoretical_loss": 3.4707286474910735, "tokens_seen": 1919156224 }, { "epoch": 0.58, "learning_rate": 0.00042256459637297385, "loss": 0.0665, "theoretical_loss": 3.4706891287162787, "tokens_seen": 1919418368 }, { "epoch": 0.58, "learning_rate": 0.00042248435243139146, "loss": 0.0691, "theoretical_loss": 3.470649616849379, "tokens_seen": 1919680512 }, { "epoch": 0.58, "learning_rate": 0.000422404108489809, "loss": 0.0683, "theoretical_loss": 3.4706101118882247, "tokens_seen": 1919942656 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.0006141560152173042, "objective/train/docs_used": 700307, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4711885452270508, "objective/train/original_loss": 1.4711885452270508, "objective/train/theoretical_loss": 3.4705706138306653, "objective/train/tokens_used": 1940664800, "objective/train/value_avg": -0.005828857421875, "objective/train/value_loss": 0.0001746070192893967, "objective/train/value_max": -5.561113357543945e-05, "objective/train/value_min": -0.26611328125, "objective/train/value_reward_corr": 0.5963098498497844, "objective/train/value_std": 0.009674072265625, "objective/train/weight_avg": 1.0006877183914185, "objective/train/weighted_lm_loss": 1.4726924896240234, "objective/train/weights_max": 1.165907382965088, "objective/train/weights_min": 0.2251291126012802, "theoretical_loss": 3.4705706138306653, "tokens_seen": 1920204800 }, { "epoch": 0.58, "learning_rate": 0.00042232386454822663, "loss": 0.0705, "theoretical_loss": 3.4705706138306653, "tokens_seen": 1920204800 }, { "epoch": 0.58, "learning_rate": 0.00042224362060664424, "loss": 0.0686, "theoretical_loss": 3.4705311226745525, "tokens_seen": 1920466944 }, { "epoch": 0.58, "learning_rate": 0.00042216337666506175, "loss": 0.0679, "theoretical_loss": 3.470491638417739, "tokens_seen": 1920729088 }, { "epoch": 0.58, "learning_rate": 0.00042208313272347936, "loss": 0.0665, "theoretical_loss": 3.470452161058078, "tokens_seen": 1920991232 }, { "epoch": 0.58, "learning_rate": 0.00042200288878189697, "loss": 0.0694, "theoretical_loss": 3.470412690593423, "tokens_seen": 1921253376 }, { "epoch": 0.58, "learning_rate": 0.0004219226448403146, "loss": 0.0665, "theoretical_loss": 3.4703732270216303, "tokens_seen": 1921515520 }, { "epoch": 0.58, "learning_rate": 0.00042184240089873214, "loss": 0.0671, "theoretical_loss": 3.470333770340555, "tokens_seen": 1921777664 }, { "epoch": 0.58, "learning_rate": 0.00042176215695714976, "loss": 0.0682, "theoretical_loss": 3.4702943205480548, "tokens_seen": 1922039808 }, { "epoch": 0.58, "learning_rate": 0.00042168191301556737, "loss": 0.0678, "theoretical_loss": 3.470254877641988, "tokens_seen": 1922301952 }, { "epoch": 0.58, "learning_rate": 0.00042160166907398493, "loss": 0.0687, "theoretical_loss": 3.470215441620213, "tokens_seen": 1922564096 }, { "epoch": 0.58, "learning_rate": 0.0004215214251324025, "loss": 0.0715, "theoretical_loss": 3.47017601248059, "tokens_seen": 1922826240 }, { "epoch": 0.58, "learning_rate": 0.0004214411811908201, "loss": 0.0679, "theoretical_loss": 3.4701365902209798, "tokens_seen": 1923088384 }, { "epoch": 0.58, "learning_rate": 0.0004213609372492377, "loss": 0.0708, "theoretical_loss": 3.4700971748392453, "tokens_seen": 1923350528 }, { "epoch": 0.58, "learning_rate": 0.00042128069330765527, "loss": 0.0678, "theoretical_loss": 3.470057766333248, "tokens_seen": 1923612672 }, { "epoch": 0.58, "learning_rate": 0.0004212004493660729, "loss": 0.0706, "theoretical_loss": 3.4700183647008522, "tokens_seen": 1923874816 }, { "epoch": 0.58, "learning_rate": 0.0004211202054244905, "loss": 0.0676, "theoretical_loss": 3.4699789699399233, "tokens_seen": 1924136960 }, { "epoch": 0.58, "learning_rate": 0.00042103996148290805, "loss": 0.0661, "theoretical_loss": 3.469939582048326, "tokens_seen": 1924399104 }, { "epoch": 0.58, "learning_rate": 0.0004209597175413256, "loss": 0.0676, "theoretical_loss": 3.469900201023928, "tokens_seen": 1924661248 }, { "epoch": 0.58, "learning_rate": 0.0004208794735997432, "loss": 0.0651, "theoretical_loss": 3.469860826864596, "tokens_seen": 1924923392 }, { "epoch": 0.58, "learning_rate": 0.0004207992296581608, "loss": 0.0689, "theoretical_loss": 3.4698214595681995, "tokens_seen": 1925185536 }, { "epoch": 0.58, "learning_rate": 0.0004207189857165784, "loss": 0.0677, "theoretical_loss": 3.4697820991326074, "tokens_seen": 1925447680 }, { "epoch": 0.58, "learning_rate": 0.000420638741774996, "loss": 0.0658, "theoretical_loss": 3.46974274555569, "tokens_seen": 1925709824 }, { "epoch": 0.58, "learning_rate": 0.0004205584978334136, "loss": 0.0701, "theoretical_loss": 3.4697033988353194, "tokens_seen": 1925971968 }, { "epoch": 0.58, "learning_rate": 0.0004204782538918312, "loss": 0.0666, "theoretical_loss": 3.4696640589693675, "tokens_seen": 1926234112 }, { "epoch": 0.58, "learning_rate": 0.00042039800995024874, "loss": 0.0681, "theoretical_loss": 3.4696247259557076, "tokens_seen": 1926496256 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.000970550230704248, "objective/train/docs_used": 702709, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4049184322357178, "objective/train/original_loss": 1.4049184322357178, "objective/train/theoretical_loss": 3.469585399792215, "objective/train/tokens_used": 1947218400, "objective/train/value_avg": -0.0082550048828125, "objective/train/value_loss": 0.00031141238287091255, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.669921875, "objective/train/value_reward_corr": 0.6947720833693894, "objective/train/value_std": 0.017974853515625, "objective/train/weight_avg": 1.0011149644851685, "objective/train/weighted_lm_loss": 1.406195044517517, "objective/train/weights_max": 1.8169658184051514, "objective/train/weights_min": 0.3840899169445038, "theoretical_loss": 3.469585399792215, "tokens_seen": 1926758400 }, { "epoch": 0.58, "learning_rate": 0.00042031776600866635, "loss": 0.0706, "theoretical_loss": 3.469585399792215, "tokens_seen": 1926758400 }, { "epoch": 0.58, "learning_rate": 0.0004202375220670839, "loss": 0.0666, "theoretical_loss": 3.4695460804767633, "tokens_seen": 1927020544 }, { "epoch": 0.58, "learning_rate": 0.0004201572781255015, "loss": 0.0694, "theoretical_loss": 3.4695067680072293, "tokens_seen": 1927282688 }, { "epoch": 0.58, "learning_rate": 0.00042007703418391913, "loss": 0.0709, "theoretical_loss": 3.469467462381491, "tokens_seen": 1927544832 }, { "epoch": 0.58, "learning_rate": 0.00041999679024233675, "loss": 0.0682, "theoretical_loss": 3.469428163597426, "tokens_seen": 1927806976 }, { "epoch": 0.58, "learning_rate": 0.0004199165463007543, "loss": 0.0676, "theoretical_loss": 3.4693888716529124, "tokens_seen": 1928069120 }, { "epoch": 0.58, "learning_rate": 0.0004198363023591719, "loss": 0.0706, "theoretical_loss": 3.4693495865458313, "tokens_seen": 1928331264 }, { "epoch": 0.58, "learning_rate": 0.0004197560584175895, "loss": 0.068, "theoretical_loss": 3.4693103082740633, "tokens_seen": 1928593408 }, { "epoch": 0.58, "learning_rate": 0.00041967581447600703, "loss": 0.0717, "theoretical_loss": 3.4692710368354898, "tokens_seen": 1928855552 }, { "epoch": 0.58, "learning_rate": 0.00041959557053442465, "loss": 0.0682, "theoretical_loss": 3.469231772227994, "tokens_seen": 1929117696 }, { "epoch": 0.58, "learning_rate": 0.00041951532659284226, "loss": 0.0667, "theoretical_loss": 3.4691925144494604, "tokens_seen": 1929379840 }, { "epoch": 0.58, "learning_rate": 0.00041943508265125987, "loss": 0.0715, "theoretical_loss": 3.4691532634977724, "tokens_seen": 1929641984 }, { "epoch": 0.58, "learning_rate": 0.00041935483870967743, "loss": 0.0689, "theoretical_loss": 3.469114019370816, "tokens_seen": 1929904128 }, { "epoch": 0.58, "learning_rate": 0.00041927459476809504, "loss": 0.0686, "theoretical_loss": 3.4690747820664782, "tokens_seen": 1930166272 }, { "epoch": 0.59, "learning_rate": 0.0004191943508265126, "loss": 0.0673, "theoretical_loss": 3.469035551582646, "tokens_seen": 1930428416 }, { "epoch": 0.59, "learning_rate": 0.00041911410688493016, "loss": 0.068, "theoretical_loss": 3.468996327917208, "tokens_seen": 1930690560 }, { "epoch": 0.59, "learning_rate": 0.00041903386294334777, "loss": 0.0695, "theoretical_loss": 3.468957111068054, "tokens_seen": 1930952704 }, { "epoch": 0.59, "learning_rate": 0.0004189536190017654, "loss": 0.067, "theoretical_loss": 3.468917901033074, "tokens_seen": 1931214848 }, { "epoch": 0.59, "learning_rate": 0.000418873375060183, "loss": 0.0694, "theoretical_loss": 3.468878697810159, "tokens_seen": 1931476992 }, { "epoch": 0.59, "learning_rate": 0.00041879313111860056, "loss": 0.0699, "theoretical_loss": 3.468839501397202, "tokens_seen": 1931739136 }, { "epoch": 0.59, "learning_rate": 0.00041871288717701817, "loss": 0.0701, "theoretical_loss": 3.4688003117920956, "tokens_seen": 1932001280 }, { "epoch": 0.59, "learning_rate": 0.0004186326432354357, "loss": 0.0673, "theoretical_loss": 3.4687611289927336, "tokens_seen": 1932263424 }, { "epoch": 0.59, "learning_rate": 0.0004185523992938533, "loss": 0.0684, "theoretical_loss": 3.468721952997012, "tokens_seen": 1932525568 }, { "epoch": 0.59, "learning_rate": 0.0004184721553522709, "loss": 0.0683, "theoretical_loss": 3.4686827838028256, "tokens_seen": 1932787712 }, { "epoch": 0.59, "learning_rate": 0.0004183919114106885, "loss": 0.0693, "theoretical_loss": 3.4686436214080727, "tokens_seen": 1933049856 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.0004142014076933265, "objective/train/docs_used": 705162, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3592551946640015, "objective/train/original_loss": 1.3592551946640015, "objective/train/theoretical_loss": 3.4686044658106496, "objective/train/tokens_used": 1953772000, "objective/train/value_avg": -0.00897216796875, "objective/train/value_loss": 0.00020762467465829104, "objective/train/value_max": -5.227327346801758e-05, "objective/train/value_min": -0.3525390625, "objective/train/value_reward_corr": 0.7716059997343598, "objective/train/value_std": 0.016326904296875, "objective/train/weight_avg": 1.0005117654800415, "objective/train/weighted_lm_loss": 1.3597215414047241, "objective/train/weights_max": 1.2099170684814453, "objective/train/weights_min": 0.36951375007629395, "theoretical_loss": 3.4686044658106496, "tokens_seen": 1933312000 }, { "epoch": 0.59, "learning_rate": 0.00041831166746910607, "loss": 0.0663, "theoretical_loss": 3.4686044658106496, "tokens_seen": 1933312000 }, { "epoch": 0.59, "learning_rate": 0.0004182314235275237, "loss": 0.0669, "theoretical_loss": 3.468565317008456, "tokens_seen": 1933574144 }, { "epoch": 0.59, "learning_rate": 0.0004181511795859413, "loss": 0.0717, "theoretical_loss": 3.4685261749993908, "tokens_seen": 1933836288 }, { "epoch": 0.59, "learning_rate": 0.0004180709356443589, "loss": 0.0658, "theoretical_loss": 3.4684870397813556, "tokens_seen": 1934098432 }, { "epoch": 0.59, "learning_rate": 0.0004179906917027764, "loss": 0.0698, "theoretical_loss": 3.4684479113522517, "tokens_seen": 1934360576 }, { "epoch": 0.59, "learning_rate": 0.000417910447761194, "loss": 0.0688, "theoretical_loss": 3.4684087897099816, "tokens_seen": 1934622720 }, { "epoch": 0.59, "learning_rate": 0.00041783020381961164, "loss": 0.0683, "theoretical_loss": 3.4683696748524486, "tokens_seen": 1934884864 }, { "epoch": 0.59, "learning_rate": 0.0004177499598780292, "loss": 0.0709, "theoretical_loss": 3.468330566777557, "tokens_seen": 1935147008 }, { "epoch": 0.59, "learning_rate": 0.0004176697159364468, "loss": 0.0676, "theoretical_loss": 3.4682914654832118, "tokens_seen": 1935409152 }, { "epoch": 0.59, "learning_rate": 0.0004175894719948644, "loss": 0.0651, "theoretical_loss": 3.46825237096732, "tokens_seen": 1935671296 }, { "epoch": 0.59, "learning_rate": 0.00041750922805328203, "loss": 0.0671, "theoretical_loss": 3.4682132832277883, "tokens_seen": 1935933440 }, { "epoch": 0.59, "learning_rate": 0.00041742898411169954, "loss": 0.0691, "theoretical_loss": 3.4681742022625253, "tokens_seen": 1936195584 }, { "epoch": 0.59, "learning_rate": 0.00041734874017011715, "loss": 0.0735, "theoretical_loss": 3.468135128069439, "tokens_seen": 1936457728 }, { "epoch": 0.59, "learning_rate": 0.00041726849622853476, "loss": 0.0689, "theoretical_loss": 3.4680960606464404, "tokens_seen": 1936719872 }, { "epoch": 0.59, "learning_rate": 0.0004171882522869523, "loss": 0.0692, "theoretical_loss": 3.4680569999914397, "tokens_seen": 1936982016 }, { "epoch": 0.59, "learning_rate": 0.00041710800834536993, "loss": 0.0677, "theoretical_loss": 3.468017946102349, "tokens_seen": 1937244160 }, { "epoch": 0.59, "learning_rate": 0.00041702776440378755, "loss": 0.0692, "theoretical_loss": 3.467978898977081, "tokens_seen": 1937506304 }, { "epoch": 0.59, "learning_rate": 0.00041694752046220516, "loss": 0.0699, "theoretical_loss": 3.4679398586135486, "tokens_seen": 1937768448 }, { "epoch": 0.59, "learning_rate": 0.0004168672765206227, "loss": 0.0679, "theoretical_loss": 3.467900825009668, "tokens_seen": 1938030592 }, { "epoch": 0.59, "learning_rate": 0.0004167870325790403, "loss": 0.0695, "theoretical_loss": 3.4678617981633533, "tokens_seen": 1938292736 }, { "epoch": 0.59, "learning_rate": 0.0004167067886374579, "loss": 0.0683, "theoretical_loss": 3.467822778072521, "tokens_seen": 1938554880 }, { "epoch": 0.59, "learning_rate": 0.00041662654469587545, "loss": 0.0677, "theoretical_loss": 3.4677837647350898, "tokens_seen": 1938817024 }, { "epoch": 0.59, "learning_rate": 0.00041654630075429306, "loss": 0.068, "theoretical_loss": 3.467744758148976, "tokens_seen": 1939079168 }, { "epoch": 0.59, "learning_rate": 0.00041646605681271067, "loss": 0.0686, "theoretical_loss": 3.4677057583121007, "tokens_seen": 1939341312 }, { "epoch": 0.59, "learning_rate": 0.00041638581287112823, "loss": 0.0688, "theoretical_loss": 3.467666765222383, "tokens_seen": 1939603456 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.00022620317758992314, "objective/train/docs_used": 707259, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3885008096694946, "objective/train/original_loss": 1.388500452041626, "objective/train/theoretical_loss": 3.4676277788777443, "objective/train/tokens_used": 1960325600, "objective/train/value_avg": -0.0106658935546875, "objective/train/value_loss": 0.0003325411817058921, "objective/train/value_max": -3.045797348022461e-05, "objective/train/value_min": -0.95068359375, "objective/train/value_reward_corr": 0.6915883477212278, "objective/train/value_std": 0.0175933837890625, "objective/train/weight_avg": 1.0003694295883179, "objective/train/weighted_lm_loss": 1.3884354829788208, "objective/train/weights_max": 1.4233535528182983, "objective/train/weights_min": 0.23048020899295807, "theoretical_loss": 3.4676277788777443, "tokens_seen": 1939865600 }, { "epoch": 0.59, "learning_rate": 0.00041630556892954584, "loss": 0.0682, "theoretical_loss": 3.4676277788777443, "tokens_seen": 1939865600 }, { "epoch": 0.59, "learning_rate": 0.0004162253249879634, "loss": 0.0683, "theoretical_loss": 3.467588799276106, "tokens_seen": 1940127744 }, { "epoch": 0.59, "learning_rate": 0.000416145081046381, "loss": 0.0681, "theoretical_loss": 3.4675498264153912, "tokens_seen": 1940389888 }, { "epoch": 0.59, "learning_rate": 0.00041606483710479857, "loss": 0.0693, "theoretical_loss": 3.4675108602935243, "tokens_seen": 1940652032 }, { "epoch": 0.59, "learning_rate": 0.0004159845931632162, "loss": 0.0662, "theoretical_loss": 3.4674719009084294, "tokens_seen": 1940914176 }, { "epoch": 0.59, "learning_rate": 0.0004159043492216338, "loss": 0.0691, "theoretical_loss": 3.4674329482580326, "tokens_seen": 1941176320 }, { "epoch": 0.59, "learning_rate": 0.00041582410528005135, "loss": 0.0666, "theoretical_loss": 3.46739400234026, "tokens_seen": 1941438464 }, { "epoch": 0.59, "learning_rate": 0.00041574386133846897, "loss": 0.0706, "theoretical_loss": 3.467355063153039, "tokens_seen": 1941700608 }, { "epoch": 0.59, "learning_rate": 0.0004156636173968865, "loss": 0.0647, "theoretical_loss": 3.467316130694299, "tokens_seen": 1941962752 }, { "epoch": 0.59, "learning_rate": 0.00041558337345530414, "loss": 0.0663, "theoretical_loss": 3.467277204961968, "tokens_seen": 1942224896 }, { "epoch": 0.59, "learning_rate": 0.0004155031295137217, "loss": 0.0671, "theoretical_loss": 3.467238285953977, "tokens_seen": 1942487040 }, { "epoch": 0.59, "learning_rate": 0.0004154228855721393, "loss": 0.0696, "theoretical_loss": 3.467199373668257, "tokens_seen": 1942749184 }, { "epoch": 0.59, "learning_rate": 0.0004153426416305569, "loss": 0.067, "theoretical_loss": 3.4671604681027404, "tokens_seen": 1943011328 }, { "epoch": 0.59, "learning_rate": 0.0004152623976889745, "loss": 0.0704, "theoretical_loss": 3.467121569255359, "tokens_seen": 1943273472 }, { "epoch": 0.59, "learning_rate": 0.0004151821537473921, "loss": 0.0679, "theoretical_loss": 3.4670826771240484, "tokens_seen": 1943535616 }, { "epoch": 0.59, "learning_rate": 0.0004151019098058097, "loss": 0.0711, "theoretical_loss": 3.4670437917067423, "tokens_seen": 1943797760 }, { "epoch": 0.59, "learning_rate": 0.00041502166586422726, "loss": 0.0687, "theoretical_loss": 3.4670049130013765, "tokens_seen": 1944059904 }, { "epoch": 0.59, "learning_rate": 0.0004149414219226448, "loss": 0.0657, "theoretical_loss": 3.466966041005888, "tokens_seen": 1944322048 }, { "epoch": 0.59, "learning_rate": 0.00041486117798106244, "loss": 0.0695, "theoretical_loss": 3.4669271757182143, "tokens_seen": 1944584192 }, { "epoch": 0.59, "learning_rate": 0.00041478093403948005, "loss": 0.0689, "theoretical_loss": 3.466888317136293, "tokens_seen": 1944846336 }, { "epoch": 0.59, "learning_rate": 0.0004147006900978976, "loss": 0.0678, "theoretical_loss": 3.466849465258065, "tokens_seen": 1945108480 }, { "epoch": 0.59, "learning_rate": 0.0004146204461563152, "loss": 0.068, "theoretical_loss": 3.4668106200814695, "tokens_seen": 1945370624 }, { "epoch": 0.59, "learning_rate": 0.00041454020221473283, "loss": 0.0677, "theoretical_loss": 3.466771781604448, "tokens_seen": 1945632768 }, { "epoch": 0.59, "learning_rate": 0.00041445995827315034, "loss": 0.0662, "theoretical_loss": 3.4667329498249426, "tokens_seen": 1945894912 }, { "epoch": 0.59, "learning_rate": 0.00041437971433156795, "loss": 0.0675, "theoretical_loss": 3.466694124740896, "tokens_seen": 1946157056 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.0005765951937064528, "objective/train/docs_used": 709628, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3162634372711182, "objective/train/original_loss": 1.316263198852539, "objective/train/theoretical_loss": 3.466655306350253, "objective/train/tokens_used": 1966879200, "objective/train/value_avg": -0.00644683837890625, "objective/train/value_loss": 0.0001964610710274428, "objective/train/value_max": -6.502866744995117e-05, "objective/train/value_min": -0.73486328125, "objective/train/value_reward_corr": 0.7131132573437041, "objective/train/value_std": 0.01448822021484375, "objective/train/weight_avg": 1.0006669759750366, "objective/train/weighted_lm_loss": 1.3172144889831543, "objective/train/weights_max": 1.8098821640014648, "objective/train/weights_min": 0.3702559173107147, "theoretical_loss": 3.466655306350253, "tokens_seen": 1946419200 }, { "epoch": 0.59, "learning_rate": 0.00041429947038998556, "loss": 0.0682, "theoretical_loss": 3.466655306350253, "tokens_seen": 1946419200 }, { "epoch": 0.59, "learning_rate": 0.0004142192264484032, "loss": 0.0679, "theoretical_loss": 3.4666164946509572, "tokens_seen": 1946681344 }, { "epoch": 0.59, "learning_rate": 0.00041413898250682073, "loss": 0.0697, "theoretical_loss": 3.466577689640955, "tokens_seen": 1946943488 }, { "epoch": 0.59, "learning_rate": 0.00041405873856523834, "loss": 0.0695, "theoretical_loss": 3.4665388913181934, "tokens_seen": 1947205632 }, { "epoch": 0.59, "learning_rate": 0.00041397849462365596, "loss": 0.0666, "theoretical_loss": 3.4665000996806192, "tokens_seen": 1947467776 }, { "epoch": 0.59, "learning_rate": 0.00041389825068207346, "loss": 0.0686, "theoretical_loss": 3.466461314726182, "tokens_seen": 1947729920 }, { "epoch": 0.59, "learning_rate": 0.0004138180067404911, "loss": 0.0672, "theoretical_loss": 3.4664225364528294, "tokens_seen": 1947992064 }, { "epoch": 0.59, "learning_rate": 0.0004137377627989087, "loss": 0.0697, "theoretical_loss": 3.466383764858513, "tokens_seen": 1948254208 }, { "epoch": 0.59, "learning_rate": 0.0004136575188573263, "loss": 0.0689, "theoretical_loss": 3.466344999941184, "tokens_seen": 1948516352 }, { "epoch": 0.59, "learning_rate": 0.00041357727491574386, "loss": 0.0683, "theoretical_loss": 3.4663062416987938, "tokens_seen": 1948778496 }, { "epoch": 0.59, "learning_rate": 0.00041349703097416147, "loss": 0.0697, "theoretical_loss": 3.4662674901292956, "tokens_seen": 1949040640 }, { "epoch": 0.59, "learning_rate": 0.0004134167870325791, "loss": 0.0638, "theoretical_loss": 3.4662287452306435, "tokens_seen": 1949302784 }, { "epoch": 0.59, "learning_rate": 0.00041333654309099664, "loss": 0.0683, "theoretical_loss": 3.466190007000792, "tokens_seen": 1949564928 }, { "epoch": 0.59, "learning_rate": 0.0004132562991494142, "loss": 0.0709, "theoretical_loss": 3.466151275437697, "tokens_seen": 1949827072 }, { "epoch": 0.59, "learning_rate": 0.0004131760552078318, "loss": 0.0659, "theoretical_loss": 3.4661125505393153, "tokens_seen": 1950089216 }, { "epoch": 0.59, "learning_rate": 0.0004130958112662494, "loss": 0.0688, "theoretical_loss": 3.4660738323036036, "tokens_seen": 1950351360 }, { "epoch": 0.59, "learning_rate": 0.000413015567324667, "loss": 0.0705, "theoretical_loss": 3.466035120728521, "tokens_seen": 1950613504 }, { "epoch": 0.59, "learning_rate": 0.0004129353233830846, "loss": 0.0669, "theoretical_loss": 3.465996415812027, "tokens_seen": 1950875648 }, { "epoch": 0.59, "learning_rate": 0.0004128550794415022, "loss": 0.0699, "theoretical_loss": 3.4659577175520813, "tokens_seen": 1951137792 }, { "epoch": 0.59, "learning_rate": 0.00041277483549991977, "loss": 0.0668, "theoretical_loss": 3.4659190259466444, "tokens_seen": 1951399936 }, { "epoch": 0.59, "learning_rate": 0.0004126945915583373, "loss": 0.0673, "theoretical_loss": 3.46588034099368, "tokens_seen": 1951662080 }, { "epoch": 0.59, "learning_rate": 0.00041261434761675494, "loss": 0.0689, "theoretical_loss": 3.465841662691149, "tokens_seen": 1951924224 }, { "epoch": 0.59, "learning_rate": 0.0004125341036751725, "loss": 0.0672, "theoretical_loss": 3.465802991037016, "tokens_seen": 1952186368 }, { "epoch": 0.59, "learning_rate": 0.0004124538597335901, "loss": 0.0702, "theoretical_loss": 3.4657643260292463, "tokens_seen": 1952448512 }, { "epoch": 0.59, "learning_rate": 0.0004123736157920077, "loss": 0.0689, "theoretical_loss": 3.4657256676658053, "tokens_seen": 1952710656 }, { "epoch": 0.59, "objective/train/advantage_avg": -0.0003757055092137307, "objective/train/docs_used": 712021, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3009216785430908, "objective/train/original_loss": 1.3009214401245117, "objective/train/theoretical_loss": 3.4656870159446584, "objective/train/tokens_used": 1973432800, "objective/train/value_avg": -0.01009368896484375, "objective/train/value_loss": 0.0003471270902082324, "objective/train/value_max": -5.608797073364258e-05, "objective/train/value_min": -0.74755859375, "objective/train/value_reward_corr": 0.8502257457911682, "objective/train/value_std": 0.0214691162109375, "objective/train/weight_avg": 0.9997867345809937, "objective/train/weighted_lm_loss": 1.2977733612060547, "objective/train/weights_max": 1.6069636344909668, "objective/train/weights_min": 0.40155351161956787, "theoretical_loss": 3.4656870159446584, "tokens_seen": 1952972800 }, { "epoch": 0.59, "learning_rate": 0.00041229337185042533, "loss": 0.0688, "theoretical_loss": 3.4656870159446584, "tokens_seen": 1952972800 }, { "epoch": 0.59, "learning_rate": 0.0004122131279088429, "loss": 0.0676, "theoretical_loss": 3.465648370863774, "tokens_seen": 1953234944 }, { "epoch": 0.59, "learning_rate": 0.0004121328839672605, "loss": 0.0664, "theoretical_loss": 3.46560973242112, "tokens_seen": 1953497088 }, { "epoch": 0.59, "learning_rate": 0.00041205264002567806, "loss": 0.0695, "theoretical_loss": 3.4655711006146657, "tokens_seen": 1953759232 }, { "epoch": 0.59, "learning_rate": 0.0004119723960840956, "loss": 0.0701, "theoretical_loss": 3.465532475442381, "tokens_seen": 1954021376 }, { "epoch": 0.59, "learning_rate": 0.00041189215214251323, "loss": 0.0685, "theoretical_loss": 3.4654938569022375, "tokens_seen": 1954283520 }, { "epoch": 0.59, "learning_rate": 0.00041181190820093085, "loss": 0.0684, "theoretical_loss": 3.4654552449922056, "tokens_seen": 1954545664 }, { "epoch": 0.59, "learning_rate": 0.00041173166425934846, "loss": 0.0698, "theoretical_loss": 3.4654166397102593, "tokens_seen": 1954807808 }, { "epoch": 0.59, "learning_rate": 0.000411651420317766, "loss": 0.0693, "theoretical_loss": 3.4653780410543717, "tokens_seen": 1955069952 }, { "epoch": 0.59, "learning_rate": 0.00041157117637618363, "loss": 0.0687, "theoretical_loss": 3.465339449022517, "tokens_seen": 1955332096 }, { "epoch": 0.59, "learning_rate": 0.0004114909324346012, "loss": 0.0702, "theoretical_loss": 3.4653008636126716, "tokens_seen": 1955594240 }, { "epoch": 0.59, "learning_rate": 0.00041141068849301875, "loss": 0.0684, "theoretical_loss": 3.4652622848228107, "tokens_seen": 1955856384 }, { "epoch": 0.59, "learning_rate": 0.00041133044455143636, "loss": 0.069, "theoretical_loss": 3.4652237126509124, "tokens_seen": 1956118528 }, { "epoch": 0.59, "learning_rate": 0.00041125020060985397, "loss": 0.0693, "theoretical_loss": 3.465185147094954, "tokens_seen": 1956380672 }, { "epoch": 0.59, "learning_rate": 0.0004111699566682716, "loss": 0.0695, "theoretical_loss": 3.465146588152915, "tokens_seen": 1956642816 }, { "epoch": 0.59, "learning_rate": 0.00041108971272668914, "loss": 0.0681, "theoretical_loss": 3.465108035822775, "tokens_seen": 1956904960 }, { "epoch": 0.59, "learning_rate": 0.00041100946878510676, "loss": 0.0702, "theoretical_loss": 3.4650694901025147, "tokens_seen": 1957167104 }, { "epoch": 0.59, "learning_rate": 0.0004109292248435243, "loss": 0.0669, "theoretical_loss": 3.4650309509901156, "tokens_seen": 1957429248 }, { "epoch": 0.59, "learning_rate": 0.0004108489809019419, "loss": 0.0722, "theoretical_loss": 3.4649924184835603, "tokens_seen": 1957691392 }, { "epoch": 0.59, "learning_rate": 0.0004107687369603595, "loss": 0.0703, "theoretical_loss": 3.4649538925808328, "tokens_seen": 1957953536 }, { "epoch": 0.59, "learning_rate": 0.0004106884930187771, "loss": 0.0689, "theoretical_loss": 3.4649153732799167, "tokens_seen": 1958215680 }, { "epoch": 0.59, "learning_rate": 0.0004106082490771947, "loss": 0.0673, "theoretical_loss": 3.464876860578797, "tokens_seen": 1958477824 }, { "epoch": 0.59, "learning_rate": 0.00041052800513561227, "loss": 0.0702, "theoretical_loss": 3.46483835447546, "tokens_seen": 1958739968 }, { "epoch": 0.59, "learning_rate": 0.0004104477611940299, "loss": 0.0675, "theoretical_loss": 3.464799854967893, "tokens_seen": 1959002112 }, { "epoch": 0.59, "learning_rate": 0.0004103675172524475, "loss": 0.0697, "theoretical_loss": 3.464761362054084, "tokens_seen": 1959264256 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.00013065687380731106, "objective/train/docs_used": 714425, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4167454242706299, "objective/train/original_loss": 1.4167454242706299, "objective/train/theoretical_loss": 3.4647228757320203, "objective/train/tokens_used": 1979986400, "objective/train/value_avg": -0.01082611083984375, "objective/train/value_loss": 0.0005280160694383085, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.958984375, "objective/train/value_reward_corr": 0.7742440701143263, "objective/train/value_std": 0.0279388427734375, "objective/train/weight_avg": 1.0003762245178223, "objective/train/weighted_lm_loss": 1.4171031713485718, "objective/train/weights_max": 2.2862038612365723, "objective/train/weights_min": 0.36873859167099, "theoretical_loss": 3.4647228757320203, "tokens_seen": 1959526400 }, { "epoch": 0.59, "learning_rate": 0.000410287273310865, "loss": 0.0681, "theoretical_loss": 3.4647228757320203, "tokens_seen": 1959526400 }, { "epoch": 0.59, "learning_rate": 0.0004102070293692826, "loss": 0.0697, "theoretical_loss": 3.4646843959996927, "tokens_seen": 1959788544 }, { "epoch": 0.59, "learning_rate": 0.0004101267854277002, "loss": 0.0676, "theoretical_loss": 3.464645922855091, "tokens_seen": 1960050688 }, { "epoch": 0.59, "learning_rate": 0.0004100465414861178, "loss": 0.07, "theoretical_loss": 3.464607456296207, "tokens_seen": 1960312832 }, { "epoch": 0.59, "learning_rate": 0.0004099662975445354, "loss": 0.0708, "theoretical_loss": 3.464568996321033, "tokens_seen": 1960574976 }, { "epoch": 0.59, "learning_rate": 0.000409886053602953, "loss": 0.0704, "theoretical_loss": 3.4645305429275624, "tokens_seen": 1960837120 }, { "epoch": 0.59, "learning_rate": 0.0004098058096613706, "loss": 0.0695, "theoretical_loss": 3.464492096113788, "tokens_seen": 1961099264 }, { "epoch": 0.59, "learning_rate": 0.0004097255657197881, "loss": 0.0694, "theoretical_loss": 3.4644536558777057, "tokens_seen": 1961361408 }, { "epoch": 0.59, "learning_rate": 0.00040964532177820574, "loss": 0.07, "theoretical_loss": 3.4644152222173106, "tokens_seen": 1961623552 }, { "epoch": 0.59, "learning_rate": 0.00040956507783662335, "loss": 0.0688, "theoretical_loss": 3.4643767951305997, "tokens_seen": 1961885696 }, { "epoch": 0.59, "learning_rate": 0.0004094848338950409, "loss": 0.0666, "theoretical_loss": 3.4643383746155703, "tokens_seen": 1962147840 }, { "epoch": 0.59, "learning_rate": 0.0004094045899534585, "loss": 0.0677, "theoretical_loss": 3.464299960670221, "tokens_seen": 1962409984 }, { "epoch": 0.59, "learning_rate": 0.00040932434601187613, "loss": 0.0693, "theoretical_loss": 3.464261553292551, "tokens_seen": 1962672128 }, { "epoch": 0.59, "learning_rate": 0.00040924410207029375, "loss": 0.0697, "theoretical_loss": 3.4642231524805607, "tokens_seen": 1962934272 }, { "epoch": 0.59, "learning_rate": 0.00040916385812871125, "loss": 0.0703, "theoretical_loss": 3.4641847582322507, "tokens_seen": 1963196416 }, { "epoch": 0.6, "learning_rate": 0.00040908361418712886, "loss": 0.0711, "theoretical_loss": 3.4641463705456226, "tokens_seen": 1963458560 }, { "epoch": 0.6, "learning_rate": 0.0004090033702455465, "loss": 0.0704, "theoretical_loss": 3.4641079894186797, "tokens_seen": 1963720704 }, { "epoch": 0.6, "learning_rate": 0.00040892312630396403, "loss": 0.0674, "theoretical_loss": 3.4640696148494254, "tokens_seen": 1963982848 }, { "epoch": 0.6, "learning_rate": 0.00040884288236238165, "loss": 0.068, "theoretical_loss": 3.464031246835864, "tokens_seen": 1964244992 }, { "epoch": 0.6, "learning_rate": 0.00040876263842079926, "loss": 0.0684, "theoretical_loss": 3.4639928853760016, "tokens_seen": 1964507136 }, { "epoch": 0.6, "learning_rate": 0.00040868239447921687, "loss": 0.0688, "theoretical_loss": 3.463954530467844, "tokens_seen": 1964769280 }, { "epoch": 0.6, "learning_rate": 0.00040860215053763443, "loss": 0.0672, "theoretical_loss": 3.463916182109398, "tokens_seen": 1965031424 }, { "epoch": 0.6, "learning_rate": 0.000408521906596052, "loss": 0.0686, "theoretical_loss": 3.463877840298672, "tokens_seen": 1965293568 }, { "epoch": 0.6, "learning_rate": 0.0004084416626544696, "loss": 0.0683, "theoretical_loss": 3.4638395050336745, "tokens_seen": 1965555712 }, { "epoch": 0.6, "learning_rate": 0.00040836141871288716, "loss": 0.0696, "theoretical_loss": 3.4638011763124164, "tokens_seen": 1965817856 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7414976990322337, "debugging/entropy-1-grams": 5.624956263227352, "debugging/length": 461.2173913043478, "debugging/num_segments": 23, "debugging/raw_token_scores_avg": 0.006980398204177618, "debugging/raw_token_scores_std": 0.02224303036928177, "debugging/score": 0.007207965953219283, "debugging/score_std": 0.01109644612420906, "epoch": 0.6, "objective/train/advantage_avg": 0.00012892790255136788, "objective/train/docs_used": 716760, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4562304019927979, "objective/train/original_loss": 1.456230640411377, "objective/train/theoretical_loss": 3.4637628541329066, "objective/train/tokens_used": 1986540000, "objective/train/value_avg": -0.007106781005859375, "objective/train/value_loss": 0.00028058464522473514, "objective/train/value_max": -5.269050598144531e-05, "objective/train/value_min": -0.841796875, "objective/train/value_reward_corr": 0.6591656050447988, "objective/train/value_std": 0.01369476318359375, "objective/train/weight_avg": 1.0002350807189941, "objective/train/weighted_lm_loss": 1.456529974937439, "objective/train/weights_max": 2.0581281185150146, "objective/train/weights_min": 0.06374084949493408, "theoretical_loss": 3.4637628541329066, "tokens_seen": 1966080000 }, { "epoch": 0.6, "learning_rate": 0.00040828117477130477, "loss": 0.0681, "theoretical_loss": 3.4637628541329066, "tokens_seen": 1966080000 }, { "epoch": 0.6, "learning_rate": 0.0004082009308297224, "loss": 0.0666, "theoretical_loss": 3.4637245384931576, "tokens_seen": 1966342144 }, { "epoch": 0.6, "learning_rate": 0.00040812068688813994, "loss": 0.0694, "theoretical_loss": 3.4636862293911816, "tokens_seen": 1966604288 }, { "epoch": 0.6, "learning_rate": 0.00040804044294655756, "loss": 0.0677, "theoretical_loss": 3.463647926824992, "tokens_seen": 1966866432 }, { "epoch": 0.6, "learning_rate": 0.0004079601990049751, "loss": 0.0693, "theoretical_loss": 3.4636096307926016, "tokens_seen": 1967128576 }, { "epoch": 0.6, "learning_rate": 0.0004078799550633927, "loss": 0.0695, "theoretical_loss": 3.4635713412920275, "tokens_seen": 1967390720 }, { "epoch": 0.6, "learning_rate": 0.0004077997111218103, "loss": 0.0704, "theoretical_loss": 3.4635330583212838, "tokens_seen": 1967652864 }, { "epoch": 0.6, "learning_rate": 0.0004077194671802279, "loss": 0.0694, "theoretical_loss": 3.463494781878388, "tokens_seen": 1967915008 }, { "epoch": 0.6, "learning_rate": 0.0004076392232386455, "loss": 0.0715, "theoretical_loss": 3.4634565119613576, "tokens_seen": 1968177152 }, { "epoch": 0.6, "learning_rate": 0.00040755897929706307, "loss": 0.0699, "theoretical_loss": 3.463418248568211, "tokens_seen": 1968439296 }, { "epoch": 0.6, "learning_rate": 0.0004074787353554807, "loss": 0.0673, "theoretical_loss": 3.463379991696967, "tokens_seen": 1968701440 }, { "epoch": 0.6, "learning_rate": 0.0004073984914138983, "loss": 0.0681, "theoretical_loss": 3.463341741345646, "tokens_seen": 1968963584 }, { "epoch": 0.6, "learning_rate": 0.00040731824747231585, "loss": 0.0663, "theoretical_loss": 3.4633034975122694, "tokens_seen": 1969225728 }, { "epoch": 0.6, "learning_rate": 0.0004072380035307334, "loss": 0.0693, "theoretical_loss": 3.4632652601948593, "tokens_seen": 1969487872 }, { "epoch": 0.6, "learning_rate": 0.000407157759589151, "loss": 0.0685, "theoretical_loss": 3.463227029391437, "tokens_seen": 1969750016 }, { "epoch": 0.6, "learning_rate": 0.00040707751564756864, "loss": 0.0695, "theoretical_loss": 3.4631888051000272, "tokens_seen": 1970012160 }, { "epoch": 0.6, "learning_rate": 0.0004069972717059862, "loss": 0.0669, "theoretical_loss": 3.4631505873186548, "tokens_seen": 1970274304 }, { "epoch": 0.6, "learning_rate": 0.0004069170277644038, "loss": 0.067, "theoretical_loss": 3.463112376045344, "tokens_seen": 1970536448 }, { "epoch": 0.6, "learning_rate": 0.0004068367838228214, "loss": 0.0679, "theoretical_loss": 3.463074171278122, "tokens_seen": 1970798592 }, { "epoch": 0.6, "learning_rate": 0.000406756539881239, "loss": 0.0679, "theoretical_loss": 3.4630359730150153, "tokens_seen": 1971060736 }, { "epoch": 0.6, "learning_rate": 0.00040667629593965654, "loss": 0.0694, "theoretical_loss": 3.4629977812540518, "tokens_seen": 1971322880 }, { "epoch": 0.6, "learning_rate": 0.00040659605199807415, "loss": 0.0709, "theoretical_loss": 3.46295959599326, "tokens_seen": 1971585024 }, { "epoch": 0.6, "learning_rate": 0.00040651580805649176, "loss": 0.0695, "theoretical_loss": 3.4629214172306706, "tokens_seen": 1971847168 }, { "epoch": 0.6, "learning_rate": 0.0004064355641149093, "loss": 0.0704, "theoretical_loss": 3.462883244964313, "tokens_seen": 1972109312 }, { "epoch": 0.6, "learning_rate": 0.00040635532017332693, "loss": 0.0663, "theoretical_loss": 3.4628450791922187, "tokens_seen": 1972371456 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.00040722067933529615, "objective/train/docs_used": 719209, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3216396570205688, "objective/train/original_loss": 1.3216397762298584, "objective/train/theoretical_loss": 3.4628069199124205, "objective/train/tokens_used": 1993093600, "objective/train/value_avg": -0.00849151611328125, "objective/train/value_loss": 0.00020072172628715634, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.3955078125, "objective/train/value_reward_corr": 0.7492742374040502, "objective/train/value_std": 0.01561737060546875, "objective/train/weight_avg": 1.000498652458191, "objective/train/weighted_lm_loss": 1.3224918842315674, "objective/train/weights_max": 1.2429403066635132, "objective/train/weights_min": 0.36926716566085815, "theoretical_loss": 3.4628069199124205, "tokens_seen": 1972633600 }, { "epoch": 0.6, "learning_rate": 0.00040627507623174455, "loss": 0.066, "theoretical_loss": 3.4628069199124205, "tokens_seen": 1972633600 }, { "epoch": 0.6, "learning_rate": 0.00040619483229016205, "loss": 0.0675, "theoretical_loss": 3.4627687671229515, "tokens_seen": 1972895744 }, { "epoch": 0.6, "learning_rate": 0.00040611458834857966, "loss": 0.0687, "theoretical_loss": 3.462730620821845, "tokens_seen": 1973157888 }, { "epoch": 0.6, "learning_rate": 0.0004060343444069973, "loss": 0.0695, "theoretical_loss": 3.462692481007136, "tokens_seen": 1973420032 }, { "epoch": 0.6, "learning_rate": 0.0004059541004654149, "loss": 0.0678, "theoretical_loss": 3.46265434767686, "tokens_seen": 1973682176 }, { "epoch": 0.6, "learning_rate": 0.00040587385652383245, "loss": 0.068, "theoretical_loss": 3.4626162208290534, "tokens_seen": 1973944320 }, { "epoch": 0.6, "learning_rate": 0.00040579361258225006, "loss": 0.0691, "theoretical_loss": 3.462578100461754, "tokens_seen": 1974206464 }, { "epoch": 0.6, "learning_rate": 0.00040571336864066767, "loss": 0.069, "theoretical_loss": 3.4625399865730007, "tokens_seen": 1974468608 }, { "epoch": 0.6, "learning_rate": 0.00040563312469908523, "loss": 0.067, "theoretical_loss": 3.462501879160831, "tokens_seen": 1974730752 }, { "epoch": 0.6, "learning_rate": 0.0004055528807575028, "loss": 0.0702, "theoretical_loss": 3.462463778223285, "tokens_seen": 1974992896 }, { "epoch": 0.6, "learning_rate": 0.0004054726368159204, "loss": 0.0682, "theoretical_loss": 3.462425683758404, "tokens_seen": 1975255040 }, { "epoch": 0.6, "learning_rate": 0.000405392392874338, "loss": 0.0721, "theoretical_loss": 3.4623875957642305, "tokens_seen": 1975517184 }, { "epoch": 0.6, "learning_rate": 0.00040531214893275557, "loss": 0.0698, "theoretical_loss": 3.462349514238805, "tokens_seen": 1975779328 }, { "epoch": 0.6, "learning_rate": 0.0004052319049911732, "loss": 0.0705, "theoretical_loss": 3.462311439180173, "tokens_seen": 1976041472 }, { "epoch": 0.6, "learning_rate": 0.0004051516610495908, "loss": 0.0677, "theoretical_loss": 3.4622733705863764, "tokens_seen": 1976303616 }, { "epoch": 0.6, "learning_rate": 0.00040507141710800835, "loss": 0.0654, "theoretical_loss": 3.462235308455462, "tokens_seen": 1976565760 }, { "epoch": 0.6, "learning_rate": 0.0004049911731664259, "loss": 0.0687, "theoretical_loss": 3.4621972527854745, "tokens_seen": 1976827904 }, { "epoch": 0.6, "learning_rate": 0.0004049109292248435, "loss": 0.0705, "theoretical_loss": 3.462159203574461, "tokens_seen": 1977090048 }, { "epoch": 0.6, "learning_rate": 0.00040483068528326114, "loss": 0.0686, "theoretical_loss": 3.46212116082047, "tokens_seen": 1977352192 }, { "epoch": 0.6, "learning_rate": 0.0004047504413416787, "loss": 0.0699, "theoretical_loss": 3.4620831245215484, "tokens_seen": 1977614336 }, { "epoch": 0.6, "learning_rate": 0.0004046701974000963, "loss": 0.0692, "theoretical_loss": 3.462045094675747, "tokens_seen": 1977876480 }, { "epoch": 0.6, "learning_rate": 0.0004045899534585139, "loss": 0.0683, "theoretical_loss": 3.462007071281114, "tokens_seen": 1978138624 }, { "epoch": 0.6, "learning_rate": 0.0004045097095169315, "loss": 0.0666, "theoretical_loss": 3.461969054335703, "tokens_seen": 1978400768 }, { "epoch": 0.6, "learning_rate": 0.00040442946557534904, "loss": 0.0687, "theoretical_loss": 3.461931043837563, "tokens_seen": 1978662912 }, { "epoch": 0.6, "learning_rate": 0.00040434922163376665, "loss": 0.0681, "theoretical_loss": 3.4618930397847487, "tokens_seen": 1978925056 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.0015131087275221944, "objective/train/docs_used": 721537, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4136056900024414, "objective/train/original_loss": 1.4136054515838623, "objective/train/theoretical_loss": 3.461855042175312, "objective/train/tokens_used": 1999647200, "objective/train/value_avg": -0.007205963134765625, "objective/train/value_loss": 0.00017736262816470116, "objective/train/value_max": -6.204843521118164e-05, "objective/train/value_min": -0.340087890625, "objective/train/value_reward_corr": 0.6555204938605945, "objective/train/value_std": 0.0123291015625, "objective/train/weight_avg": 1.001592993736267, "objective/train/weighted_lm_loss": 1.4155278205871582, "objective/train/weights_max": 1.4050710201263428, "objective/train/weights_min": 0.3719121813774109, "theoretical_loss": 3.461855042175312, "tokens_seen": 1979187200 }, { "epoch": 0.6, "learning_rate": 0.0004042689776921842, "loss": 0.0679, "theoretical_loss": 3.461855042175312, "tokens_seen": 1979187200 }, { "epoch": 0.6, "learning_rate": 0.0004041887337506018, "loss": 0.0687, "theoretical_loss": 3.4618170510073085, "tokens_seen": 1979449344 }, { "epoch": 0.6, "learning_rate": 0.00040410848980901944, "loss": 0.0696, "theoretical_loss": 3.4617790662787935, "tokens_seen": 1979711488 }, { "epoch": 0.6, "learning_rate": 0.00040402824586743705, "loss": 0.0724, "theoretical_loss": 3.4617410879878223, "tokens_seen": 1979973632 }, { "epoch": 0.6, "learning_rate": 0.0004039480019258546, "loss": 0.0693, "theoretical_loss": 3.461703116132452, "tokens_seen": 1980235776 }, { "epoch": 0.6, "learning_rate": 0.0004038677579842722, "loss": 0.0695, "theoretical_loss": 3.46166515071074, "tokens_seen": 1980497920 }, { "epoch": 0.6, "learning_rate": 0.0004037875140426898, "loss": 0.0684, "theoretical_loss": 3.461627191720745, "tokens_seen": 1980760064 }, { "epoch": 0.6, "learning_rate": 0.00040370727010110734, "loss": 0.0695, "theoretical_loss": 3.461589239160528, "tokens_seen": 1981022208 }, { "epoch": 0.6, "learning_rate": 0.00040362702615952495, "loss": 0.0682, "theoretical_loss": 3.4615512930281467, "tokens_seen": 1981284352 }, { "epoch": 0.6, "learning_rate": 0.00040354678221794256, "loss": 0.0676, "theoretical_loss": 3.4615133533216635, "tokens_seen": 1981546496 }, { "epoch": 0.6, "learning_rate": 0.0004034665382763602, "loss": 0.068, "theoretical_loss": 3.4614754200391404, "tokens_seen": 1981808640 }, { "epoch": 0.6, "learning_rate": 0.00040338629433477773, "loss": 0.0658, "theoretical_loss": 3.4614374931786402, "tokens_seen": 1982070784 }, { "epoch": 0.6, "learning_rate": 0.00040330605039319534, "loss": 0.0669, "theoretical_loss": 3.461399572738226, "tokens_seen": 1982332928 }, { "epoch": 0.6, "learning_rate": 0.0004032258064516129, "loss": 0.0687, "theoretical_loss": 3.461361658715963, "tokens_seen": 1982595072 }, { "epoch": 0.6, "learning_rate": 0.00040314556251003046, "loss": 0.0695, "theoretical_loss": 3.4613237511099157, "tokens_seen": 1982857216 }, { "epoch": 0.6, "learning_rate": 0.0004030653185684481, "loss": 0.0684, "theoretical_loss": 3.4612858499181502, "tokens_seen": 1983119360 }, { "epoch": 0.6, "learning_rate": 0.0004029850746268657, "loss": 0.07, "theoretical_loss": 3.4612479551387345, "tokens_seen": 1983381504 }, { "epoch": 0.6, "learning_rate": 0.0004029048306852833, "loss": 0.0689, "theoretical_loss": 3.461210066769736, "tokens_seen": 1983643648 }, { "epoch": 0.6, "learning_rate": 0.00040282458674370086, "loss": 0.0688, "theoretical_loss": 3.4611721848092225, "tokens_seen": 1983905792 }, { "epoch": 0.6, "learning_rate": 0.00040274434280211847, "loss": 0.0692, "theoretical_loss": 3.461134309255265, "tokens_seen": 1984167936 }, { "epoch": 0.6, "learning_rate": 0.0004026640988605361, "loss": 0.068, "theoretical_loss": 3.4610964401059325, "tokens_seen": 1984430080 }, { "epoch": 0.6, "learning_rate": 0.0004025838549189536, "loss": 0.0691, "theoretical_loss": 3.461058577359297, "tokens_seen": 1984692224 }, { "epoch": 0.6, "learning_rate": 0.0004025036109773712, "loss": 0.0684, "theoretical_loss": 3.4610207210134294, "tokens_seen": 1984954368 }, { "epoch": 0.6, "learning_rate": 0.0004024233670357888, "loss": 0.0702, "theoretical_loss": 3.4609828710664035, "tokens_seen": 1985216512 }, { "epoch": 0.6, "learning_rate": 0.0004023431230942064, "loss": 0.0673, "theoretical_loss": 3.460945027516293, "tokens_seen": 1985478656 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.0006132093258202076, "objective/train/docs_used": 723995, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3630348443984985, "objective/train/original_loss": 1.3630346059799194, "objective/train/theoretical_loss": 3.460907190361172, "objective/train/tokens_used": 2006200800, "objective/train/value_avg": -0.00844573974609375, "objective/train/value_loss": 0.00041758434963412583, "objective/train/value_max": -5.143880844116211e-05, "objective/train/value_min": -0.984375, "objective/train/value_reward_corr": 0.6131844355731204, "objective/train/value_std": 0.01934814453125, "objective/train/weight_avg": 1.0008108615875244, "objective/train/weighted_lm_loss": 1.3635249137878418, "objective/train/weights_max": 2.572547435760498, "objective/train/weights_min": 0.3744775056838989, "theoretical_loss": 3.460907190361172, "tokens_seen": 1985740800 }, { "epoch": 0.6, "learning_rate": 0.000402262879152624, "loss": 0.0675, "theoretical_loss": 3.460907190361172, "tokens_seen": 1985740800 }, { "epoch": 0.6, "learning_rate": 0.0004021826352110416, "loss": 0.0679, "theoretical_loss": 3.460869359599116, "tokens_seen": 1986002944 }, { "epoch": 0.6, "learning_rate": 0.0004021023912694592, "loss": 0.0682, "theoretical_loss": 3.460831535228201, "tokens_seen": 1986265088 }, { "epoch": 0.6, "learning_rate": 0.0004020221473278767, "loss": 0.0695, "theoretical_loss": 3.4607937172465046, "tokens_seen": 1986527232 }, { "epoch": 0.6, "learning_rate": 0.0004019419033862943, "loss": 0.0671, "theoretical_loss": 3.4607559056521033, "tokens_seen": 1986789376 }, { "epoch": 0.6, "learning_rate": 0.00040186165944471194, "loss": 0.069, "theoretical_loss": 3.4607181004430774, "tokens_seen": 1987051520 }, { "epoch": 0.6, "learning_rate": 0.0004017814155031295, "loss": 0.0683, "theoretical_loss": 3.460680301617505, "tokens_seen": 1987313664 }, { "epoch": 0.6, "learning_rate": 0.0004017011715615471, "loss": 0.069, "theoretical_loss": 3.460642509173468, "tokens_seen": 1987575808 }, { "epoch": 0.6, "learning_rate": 0.0004016209276199647, "loss": 0.0719, "theoretical_loss": 3.4606047231090455, "tokens_seen": 1987837952 }, { "epoch": 0.6, "learning_rate": 0.00040154068367838233, "loss": 0.0658, "theoretical_loss": 3.4605669434223216, "tokens_seen": 1988100096 }, { "epoch": 0.6, "learning_rate": 0.00040146043973679984, "loss": 0.0671, "theoretical_loss": 3.4605291701113776, "tokens_seen": 1988362240 }, { "epoch": 0.6, "learning_rate": 0.00040138019579521745, "loss": 0.0663, "theoretical_loss": 3.4604914031742977, "tokens_seen": 1988624384 }, { "epoch": 0.6, "learning_rate": 0.00040129995185363506, "loss": 0.0673, "theoretical_loss": 3.460453642609166, "tokens_seen": 1988886528 }, { "epoch": 0.6, "learning_rate": 0.0004012197079120526, "loss": 0.0697, "theoretical_loss": 3.4604158884140683, "tokens_seen": 1989148672 }, { "epoch": 0.6, "learning_rate": 0.00040113946397047023, "loss": 0.0686, "theoretical_loss": 3.460378140587091, "tokens_seen": 1989410816 }, { "epoch": 0.6, "learning_rate": 0.00040105922002888785, "loss": 0.0693, "theoretical_loss": 3.46034039912632, "tokens_seen": 1989672960 }, { "epoch": 0.6, "learning_rate": 0.00040097897608730546, "loss": 0.0662, "theoretical_loss": 3.460302664029844, "tokens_seen": 1989935104 }, { "epoch": 0.6, "learning_rate": 0.000400898732145723, "loss": 0.0708, "theoretical_loss": 3.4602649352957515, "tokens_seen": 1990197248 }, { "epoch": 0.6, "learning_rate": 0.0004008184882041406, "loss": 0.0675, "theoretical_loss": 3.460227212922131, "tokens_seen": 1990459392 }, { "epoch": 0.6, "learning_rate": 0.0004007382442625582, "loss": 0.0693, "theoretical_loss": 3.4601894969070743, "tokens_seen": 1990721536 }, { "epoch": 0.6, "learning_rate": 0.00040065800032097575, "loss": 0.067, "theoretical_loss": 3.460151787248672, "tokens_seen": 1990983680 }, { "epoch": 0.6, "learning_rate": 0.00040057775637939336, "loss": 0.0682, "theoretical_loss": 3.460114083945015, "tokens_seen": 1991245824 }, { "epoch": 0.6, "learning_rate": 0.00040049751243781097, "loss": 0.0689, "theoretical_loss": 3.4600763869941966, "tokens_seen": 1991507968 }, { "epoch": 0.6, "learning_rate": 0.0004004172684962286, "loss": 0.0687, "theoretical_loss": 3.460038696394311, "tokens_seen": 1991770112 }, { "epoch": 0.6, "learning_rate": 0.00040033702455464614, "loss": 0.0706, "theoretical_loss": 3.4600010121434517, "tokens_seen": 1992032256 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.0002454657224006951, "objective/train/docs_used": 726441, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3276835680007935, "objective/train/original_loss": 1.327683448791504, "objective/train/theoretical_loss": 3.4599633342397142, "objective/train/tokens_used": 2012754400, "objective/train/value_avg": -0.0082244873046875, "objective/train/value_loss": 0.00023926161520648748, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.59423828125, "objective/train/value_reward_corr": 0.7205625276168954, "objective/train/value_std": 0.0155487060546875, "objective/train/weight_avg": 1.000353455543518, "objective/train/weighted_lm_loss": 1.3281760215759277, "objective/train/weights_max": 1.299572229385376, "objective/train/weights_min": 0.36829331517219543, "theoretical_loss": 3.4599633342397142, "tokens_seen": 1992294400 }, { "epoch": 0.6, "learning_rate": 0.0004002567806130637, "loss": 0.068, "theoretical_loss": 3.4599633342397142, "tokens_seen": 1992294400 }, { "epoch": 0.6, "learning_rate": 0.0004001765366714813, "loss": 0.0682, "theoretical_loss": 3.4599256626811945, "tokens_seen": 1992556544 }, { "epoch": 0.6, "learning_rate": 0.0004000962927298989, "loss": 0.0682, "theoretical_loss": 3.45988799746599, "tokens_seen": 1992818688 }, { "epoch": 0.6, "learning_rate": 0.0004000160487883165, "loss": 0.0677, "theoretical_loss": 3.4598503385921977, "tokens_seen": 1993080832 }, { "epoch": 0.6, "learning_rate": 0.0003999358048467341, "loss": 0.0677, "theoretical_loss": 3.4598126860579166, "tokens_seen": 1993342976 }, { "epoch": 0.6, "learning_rate": 0.00039985556090515166, "loss": 0.0673, "theoretical_loss": 3.4597750398612455, "tokens_seen": 1993605120 }, { "epoch": 0.6, "learning_rate": 0.00039977531696356927, "loss": 0.0702, "theoretical_loss": 3.459737400000284, "tokens_seen": 1993867264 }, { "epoch": 0.6, "learning_rate": 0.00039969507302198683, "loss": 0.0675, "theoretical_loss": 3.4596997664731344, "tokens_seen": 1994129408 }, { "epoch": 0.6, "learning_rate": 0.00039961482908040444, "loss": 0.0685, "theoretical_loss": 3.4596621392778983, "tokens_seen": 1994391552 }, { "epoch": 0.6, "learning_rate": 0.000399534585138822, "loss": 0.0685, "theoretical_loss": 3.459624518412677, "tokens_seen": 1994653696 }, { "epoch": 0.6, "learning_rate": 0.0003994543411972396, "loss": 0.0668, "theoretical_loss": 3.459586903875575, "tokens_seen": 1994915840 }, { "epoch": 0.6, "learning_rate": 0.0003993740972556572, "loss": 0.0692, "theoretical_loss": 3.4595492956646963, "tokens_seen": 1995177984 }, { "epoch": 0.6, "learning_rate": 0.0003992938533140748, "loss": 0.0675, "theoretical_loss": 3.459511693778146, "tokens_seen": 1995440128 }, { "epoch": 0.6, "learning_rate": 0.0003992136093724924, "loss": 0.0679, "theoretical_loss": 3.4594740982140295, "tokens_seen": 1995702272 }, { "epoch": 0.6, "learning_rate": 0.00039913336543091, "loss": 0.0699, "theoretical_loss": 3.459436508970454, "tokens_seen": 1995964416 }, { "epoch": 0.6, "learning_rate": 0.00039905312148932757, "loss": 0.0689, "theoretical_loss": 3.4593989260455267, "tokens_seen": 1996226560 }, { "epoch": 0.61, "learning_rate": 0.0003989728775477451, "loss": 0.0677, "theoretical_loss": 3.459361349437356, "tokens_seen": 1996488704 }, { "epoch": 0.61, "learning_rate": 0.00039889263360616274, "loss": 0.0695, "theoretical_loss": 3.459323779144051, "tokens_seen": 1996750848 }, { "epoch": 0.61, "learning_rate": 0.00039881238966458035, "loss": 0.0677, "theoretical_loss": 3.4592862151637216, "tokens_seen": 1997012992 }, { "epoch": 0.61, "learning_rate": 0.0003987321457229979, "loss": 0.0673, "theoretical_loss": 3.4592486574944785, "tokens_seen": 1997275136 }, { "epoch": 0.61, "learning_rate": 0.0003986519017814155, "loss": 0.0693, "theoretical_loss": 3.4592111061344335, "tokens_seen": 1997537280 }, { "epoch": 0.61, "learning_rate": 0.00039857165783983313, "loss": 0.07, "theoretical_loss": 3.4591735610816983, "tokens_seen": 1997799424 }, { "epoch": 0.61, "learning_rate": 0.0003984914138982507, "loss": 0.0689, "theoretical_loss": 3.459136022334387, "tokens_seen": 1998061568 }, { "epoch": 0.61, "learning_rate": 0.00039841116995666825, "loss": 0.0715, "theoretical_loss": 3.4590984898906134, "tokens_seen": 1998323712 }, { "epoch": 0.61, "learning_rate": 0.00039833092601508586, "loss": 0.0672, "theoretical_loss": 3.4590609637484913, "tokens_seen": 1998585856 }, { "epoch": 0.61, "objective/train/advantage_avg": 1.4012218343850691e-05, "objective/train/docs_used": 728818, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3711408376693726, "objective/train/original_loss": 1.371140718460083, "objective/train/theoretical_loss": 3.459023443906138, "objective/train/tokens_used": 2019308000, "objective/train/value_avg": -0.0090179443359375, "objective/train/value_loss": 0.0004112530150450766, "objective/train/value_max": -6.556510925292969e-05, "objective/train/value_min": -0.98876953125, "objective/train/value_reward_corr": 0.7207536114861102, "objective/train/value_std": 0.0240020751953125, "objective/train/weight_avg": 1.0002104043960571, "objective/train/weighted_lm_loss": 1.3707592487335205, "objective/train/weights_max": 2.44260835647583, "objective/train/weights_min": 0.229869082570076, "theoretical_loss": 3.459023443906138, "tokens_seen": 1998848000 }, { "epoch": 0.61, "learning_rate": 0.0003982506820735035, "loss": 0.0686, "theoretical_loss": 3.459023443906138, "tokens_seen": 1998848000 }, { "epoch": 0.61, "learning_rate": 0.00039817043813192103, "loss": 0.0677, "theoretical_loss": 3.458985930361668, "tokens_seen": 1999110144 }, { "epoch": 0.61, "learning_rate": 0.00039809019419033865, "loss": 0.0679, "theoretical_loss": 3.4589484231132, "tokens_seen": 1999372288 }, { "epoch": 0.61, "learning_rate": 0.00039800995024875626, "loss": 0.0668, "theoretical_loss": 3.4589109221588514, "tokens_seen": 1999634432 }, { "epoch": 0.61, "learning_rate": 0.0003979297063071738, "loss": 0.072, "theoretical_loss": 3.4588734274967416, "tokens_seen": 1999896576 }, { "epoch": 0.61, "learning_rate": 0.0003978494623655914, "loss": 0.0699, "theoretical_loss": 3.4588359391249894, "tokens_seen": 2000158720 }, { "epoch": 0.61, "learning_rate": 0.000397769218424009, "loss": 0.068, "theoretical_loss": 3.458798457041716, "tokens_seen": 2000420864 }, { "epoch": 0.61, "learning_rate": 0.0003976889744824266, "loss": 0.0683, "theoretical_loss": 3.4587609812450424, "tokens_seen": 2000683008 }, { "epoch": 0.61, "learning_rate": 0.00039760873054084416, "loss": 0.0691, "theoretical_loss": 3.4587235117330906, "tokens_seen": 2000945152 }, { "epoch": 0.61, "learning_rate": 0.00039752848659926177, "loss": 0.0663, "theoretical_loss": 3.4586860485039836, "tokens_seen": 2001207296 }, { "epoch": 0.61, "learning_rate": 0.0003974482426576794, "loss": 0.0684, "theoretical_loss": 3.4586485915558454, "tokens_seen": 2001469440 }, { "epoch": 0.61, "learning_rate": 0.00039736799871609694, "loss": 0.0697, "theoretical_loss": 3.4586111408868, "tokens_seen": 2001731584 }, { "epoch": 0.61, "learning_rate": 0.0003972877547745145, "loss": 0.0687, "theoretical_loss": 3.4585736964949727, "tokens_seen": 2001993728 }, { "epoch": 0.61, "learning_rate": 0.0003972075108329321, "loss": 0.0665, "theoretical_loss": 3.45853625837849, "tokens_seen": 2002255872 }, { "epoch": 0.61, "learning_rate": 0.0003971272668913497, "loss": 0.0666, "theoretical_loss": 3.458498826535479, "tokens_seen": 2002518016 }, { "epoch": 0.61, "learning_rate": 0.0003970470229497673, "loss": 0.0699, "theoretical_loss": 3.4584614009640666, "tokens_seen": 2002780160 }, { "epoch": 0.61, "learning_rate": 0.0003969667790081849, "loss": 0.0666, "theoretical_loss": 3.4584239816623823, "tokens_seen": 2003042304 }, { "epoch": 0.61, "learning_rate": 0.0003968865350666025, "loss": 0.0688, "theoretical_loss": 3.4583865686285544, "tokens_seen": 2003304448 }, { "epoch": 0.61, "learning_rate": 0.00039680629112502007, "loss": 0.0705, "theoretical_loss": 3.4583491618607134, "tokens_seen": 2003566592 }, { "epoch": 0.61, "learning_rate": 0.00039672604718343763, "loss": 0.0687, "theoretical_loss": 3.458311761356991, "tokens_seen": 2003828736 }, { "epoch": 0.61, "learning_rate": 0.00039664580324185524, "loss": 0.0695, "theoretical_loss": 3.4582743671155183, "tokens_seen": 2004090880 }, { "epoch": 0.61, "learning_rate": 0.00039656555930027285, "loss": 0.0692, "theoretical_loss": 3.458236979134428, "tokens_seen": 2004353024 }, { "epoch": 0.61, "learning_rate": 0.0003964853153586904, "loss": 0.0669, "theoretical_loss": 3.458199597411853, "tokens_seen": 2004615168 }, { "epoch": 0.61, "learning_rate": 0.000396405071417108, "loss": 0.069, "theoretical_loss": 3.4581622219459276, "tokens_seen": 2004877312 }, { "epoch": 0.61, "learning_rate": 0.00039632482747552564, "loss": 0.0657, "theoretical_loss": 3.4581248527347874, "tokens_seen": 2005139456 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0006999548641033471, "objective/train/docs_used": 731237, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3625905513763428, "objective/train/original_loss": 1.3625905513763428, "objective/train/theoretical_loss": 3.458087489776567, "objective/train/tokens_used": 2025861600, "objective/train/value_avg": -0.0076751708984375, "objective/train/value_loss": 0.0002253977581858635, "objective/train/value_max": -2.9981136322021484e-05, "objective/train/value_min": -0.70458984375, "objective/train/value_reward_corr": 0.7154743835992182, "objective/train/value_std": 0.016448974609375, "objective/train/weight_avg": 1.0008041858673096, "objective/train/weighted_lm_loss": 1.363987684249878, "objective/train/weights_max": 1.384639024734497, "objective/train/weights_min": 0.3804037272930145, "theoretical_loss": 3.458087489776567, "tokens_seen": 2005401600 }, { "epoch": 0.61, "learning_rate": 0.0003962445835339432, "loss": 0.0674, "theoretical_loss": 3.458087489776567, "tokens_seen": 2005401600 }, { "epoch": 0.61, "learning_rate": 0.0003961643395923608, "loss": 0.0677, "theoretical_loss": 3.458050133069404, "tokens_seen": 2005663744 }, { "epoch": 0.61, "learning_rate": 0.00039608409565077837, "loss": 0.0685, "theoretical_loss": 3.4580127826114353, "tokens_seen": 2005925888 }, { "epoch": 0.61, "learning_rate": 0.0003960038517091959, "loss": 0.0672, "theoretical_loss": 3.457975438400799, "tokens_seen": 2006188032 }, { "epoch": 0.61, "learning_rate": 0.00039592360776761354, "loss": 0.0655, "theoretical_loss": 3.4579381004356344, "tokens_seen": 2006450176 }, { "epoch": 0.61, "learning_rate": 0.00039584336382603115, "loss": 0.0684, "theoretical_loss": 3.4579007687140804, "tokens_seen": 2006712320 }, { "epoch": 0.61, "learning_rate": 0.00039576311988444876, "loss": 0.0702, "theoretical_loss": 3.4578634432342783, "tokens_seen": 2006974464 }, { "epoch": 0.61, "learning_rate": 0.0003956828759428663, "loss": 0.0702, "theoretical_loss": 3.4578261239943693, "tokens_seen": 2007236608 }, { "epoch": 0.61, "learning_rate": 0.00039560263200128393, "loss": 0.0704, "theoretical_loss": 3.4577888109924952, "tokens_seen": 2007498752 }, { "epoch": 0.61, "learning_rate": 0.0003955223880597015, "loss": 0.0688, "theoretical_loss": 3.4577515042267994, "tokens_seen": 2007760896 }, { "epoch": 0.61, "learning_rate": 0.00039544214411811905, "loss": 0.0655, "theoretical_loss": 3.457714203695425, "tokens_seen": 2008023040 }, { "epoch": 0.61, "learning_rate": 0.00039536190017653666, "loss": 0.067, "theoretical_loss": 3.4576769093965174, "tokens_seen": 2008285184 }, { "epoch": 0.61, "learning_rate": 0.0003952816562349543, "loss": 0.0681, "theoretical_loss": 3.4576396213282212, "tokens_seen": 2008547328 }, { "epoch": 0.61, "learning_rate": 0.0003952014122933719, "loss": 0.0703, "theoretical_loss": 3.457602339488682, "tokens_seen": 2008809472 }, { "epoch": 0.61, "learning_rate": 0.00039512116835178945, "loss": 0.0683, "theoretical_loss": 3.4575650638760482, "tokens_seen": 2009071616 }, { "epoch": 0.61, "learning_rate": 0.00039504092441020706, "loss": 0.0687, "theoretical_loss": 3.457527794488466, "tokens_seen": 2009333760 }, { "epoch": 0.61, "learning_rate": 0.0003949606804686246, "loss": 0.0685, "theoretical_loss": 3.457490531324085, "tokens_seen": 2009595904 }, { "epoch": 0.61, "learning_rate": 0.0003948804365270422, "loss": 0.0662, "theoretical_loss": 3.4574532743810535, "tokens_seen": 2009858048 }, { "epoch": 0.61, "learning_rate": 0.0003948001925854598, "loss": 0.0674, "theoretical_loss": 3.4574160236575224, "tokens_seen": 2010120192 }, { "epoch": 0.61, "learning_rate": 0.0003947199486438774, "loss": 0.0686, "theoretical_loss": 3.457378779151642, "tokens_seen": 2010382336 }, { "epoch": 0.61, "learning_rate": 0.000394639704702295, "loss": 0.0674, "theoretical_loss": 3.457341540861564, "tokens_seen": 2010644480 }, { "epoch": 0.61, "learning_rate": 0.00039455946076071257, "loss": 0.0667, "theoretical_loss": 3.4573043087854414, "tokens_seen": 2010906624 }, { "epoch": 0.61, "learning_rate": 0.0003944792168191302, "loss": 0.0687, "theoretical_loss": 3.4572670829214265, "tokens_seen": 2011168768 }, { "epoch": 0.61, "learning_rate": 0.0003943989728775478, "loss": 0.0683, "theoretical_loss": 3.457229863267674, "tokens_seen": 2011430912 }, { "epoch": 0.61, "learning_rate": 0.0003943187289359653, "loss": 0.0678, "theoretical_loss": 3.457192649822338, "tokens_seen": 2011693056 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0003829321067314595, "objective/train/docs_used": 733698, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4594643115997314, "objective/train/original_loss": 1.4594643115997314, "objective/train/theoretical_loss": 3.4571554425835753, "objective/train/tokens_used": 2032415200, "objective/train/value_avg": -0.00807952880859375, "objective/train/value_loss": 0.0001753742399159819, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.81005859375, "objective/train/value_reward_corr": 0.7278631297172813, "objective/train/value_std": 0.0141754150390625, "objective/train/weight_avg": 1.0004661083221436, "objective/train/weighted_lm_loss": 1.4600026607513428, "objective/train/weights_max": 1.5876256227493286, "objective/train/weights_min": 0.3781697452068329, "theoretical_loss": 3.4571554425835753, "tokens_seen": 2011955200 }, { "epoch": 0.61, "learning_rate": 0.0003942384849943829, "loss": 0.0698, "theoretical_loss": 3.4571554425835753, "tokens_seen": 2011955200 }, { "epoch": 0.61, "learning_rate": 0.0003941582410528005, "loss": 0.0688, "theoretical_loss": 3.457118241549541, "tokens_seen": 2012217344 }, { "epoch": 0.61, "learning_rate": 0.00039407799711121814, "loss": 0.0698, "theoretical_loss": 3.4570810467183932, "tokens_seen": 2012479488 }, { "epoch": 0.61, "learning_rate": 0.0003939977531696357, "loss": 0.0696, "theoretical_loss": 3.45704385808829, "tokens_seen": 2012741632 }, { "epoch": 0.61, "learning_rate": 0.0003939175092280533, "loss": 0.0704, "theoretical_loss": 3.4570066756573885, "tokens_seen": 2013003776 }, { "epoch": 0.61, "learning_rate": 0.0003938372652864709, "loss": 0.0675, "theoretical_loss": 3.45696949942385, "tokens_seen": 2013265920 }, { "epoch": 0.61, "learning_rate": 0.0003937570213448884, "loss": 0.0683, "theoretical_loss": 3.4569323293858334, "tokens_seen": 2013528064 }, { "epoch": 0.61, "learning_rate": 0.00039367677740330604, "loss": 0.0678, "theoretical_loss": 3.4568951655415017, "tokens_seen": 2013790208 }, { "epoch": 0.61, "learning_rate": 0.00039359653346172365, "loss": 0.0664, "theoretical_loss": 3.4568580078890143, "tokens_seen": 2014052352 }, { "epoch": 0.61, "learning_rate": 0.0003935162895201412, "loss": 0.0694, "theoretical_loss": 3.4568208564265364, "tokens_seen": 2014314496 }, { "epoch": 0.61, "learning_rate": 0.0003934360455785588, "loss": 0.0677, "theoretical_loss": 3.4567837111522293, "tokens_seen": 2014576640 }, { "epoch": 0.61, "learning_rate": 0.00039335580163697644, "loss": 0.0675, "theoretical_loss": 3.456746572064259, "tokens_seen": 2014838784 }, { "epoch": 0.61, "learning_rate": 0.00039327555769539405, "loss": 0.0676, "theoretical_loss": 3.456709439160789, "tokens_seen": 2015100928 }, { "epoch": 0.61, "learning_rate": 0.0003931953137538116, "loss": 0.0678, "theoretical_loss": 3.456672312439986, "tokens_seen": 2015363072 }, { "epoch": 0.61, "learning_rate": 0.00039311506981222916, "loss": 0.0679, "theoretical_loss": 3.4566351919000167, "tokens_seen": 2015625216 }, { "epoch": 0.61, "learning_rate": 0.0003930348258706468, "loss": 0.0649, "theoretical_loss": 3.4565980775390477, "tokens_seen": 2015887360 }, { "epoch": 0.61, "learning_rate": 0.00039295458192906434, "loss": 0.0688, "theoretical_loss": 3.456560969355248, "tokens_seen": 2016149504 }, { "epoch": 0.61, "learning_rate": 0.00039287433798748195, "loss": 0.0673, "theoretical_loss": 3.456523867346786, "tokens_seen": 2016411648 }, { "epoch": 0.61, "learning_rate": 0.00039279409404589956, "loss": 0.0654, "theoretical_loss": 3.4564867715118313, "tokens_seen": 2016673792 }, { "epoch": 0.61, "learning_rate": 0.0003927138501043172, "loss": 0.0686, "theoretical_loss": 3.4564496818485546, "tokens_seen": 2016935936 }, { "epoch": 0.61, "learning_rate": 0.00039263360616273473, "loss": 0.0693, "theoretical_loss": 3.4564125983551275, "tokens_seen": 2017198080 }, { "epoch": 0.61, "learning_rate": 0.0003925533622211523, "loss": 0.0696, "theoretical_loss": 3.4563755210297216, "tokens_seen": 2017460224 }, { "epoch": 0.61, "learning_rate": 0.0003924731182795699, "loss": 0.0685, "theoretical_loss": 3.4563384498705094, "tokens_seen": 2017722368 }, { "epoch": 0.61, "learning_rate": 0.00039239287433798746, "loss": 0.0679, "theoretical_loss": 3.456301384875666, "tokens_seen": 2017984512 }, { "epoch": 0.61, "learning_rate": 0.0003923126303964051, "loss": 0.0666, "theoretical_loss": 3.456264326043364, "tokens_seen": 2018246656 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.00186018249951303, "objective/train/docs_used": 736110, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.349985957145691, "objective/train/original_loss": 1.3499860763549805, "objective/train/theoretical_loss": 3.4562272733717796, "objective/train/tokens_used": 2038968800, "objective/train/value_avg": -0.01027679443359375, "objective/train/value_loss": 0.00021227422985248268, "objective/train/value_max": -5.4776668548583984e-05, "objective/train/value_min": -0.449462890625, "objective/train/value_reward_corr": 0.785820240253668, "objective/train/value_std": 0.0207061767578125, "objective/train/weight_avg": 1.0019619464874268, "objective/train/weighted_lm_loss": 1.3521424531936646, "objective/train/weights_max": 1.5077892541885376, "objective/train/weights_min": 0.3687315881252289, "theoretical_loss": 3.4562272733717796, "tokens_seen": 2018508800 }, { "epoch": 0.61, "learning_rate": 0.0003922323864548227, "loss": 0.0684, "theoretical_loss": 3.4562272733717796, "tokens_seen": 2018508800 }, { "epoch": 0.61, "learning_rate": 0.0003921521425132403, "loss": 0.068, "theoretical_loss": 3.4561902268590883, "tokens_seen": 2018770944 }, { "epoch": 0.61, "learning_rate": 0.00039207189857165786, "loss": 0.0674, "theoretical_loss": 3.4561531865034665, "tokens_seen": 2019033088 }, { "epoch": 0.61, "learning_rate": 0.0003919916546300754, "loss": 0.0654, "theoretical_loss": 3.4561161523030925, "tokens_seen": 2019295232 }, { "epoch": 0.61, "learning_rate": 0.00039191141068849303, "loss": 0.0672, "theoretical_loss": 3.456079124256145, "tokens_seen": 2019557376 }, { "epoch": 0.61, "learning_rate": 0.0003918311667469106, "loss": 0.0706, "theoretical_loss": 3.4560421023608012, "tokens_seen": 2019819520 }, { "epoch": 0.61, "learning_rate": 0.0003917509228053282, "loss": 0.0705, "theoretical_loss": 3.4560050866152423, "tokens_seen": 2020081664 }, { "epoch": 0.61, "learning_rate": 0.0003916706788637458, "loss": 0.0665, "theoretical_loss": 3.455968077017649, "tokens_seen": 2020343808 }, { "epoch": 0.61, "learning_rate": 0.00039159043492216337, "loss": 0.0675, "theoretical_loss": 3.455931073566202, "tokens_seen": 2020605952 }, { "epoch": 0.61, "learning_rate": 0.000391510190980581, "loss": 0.0674, "theoretical_loss": 3.4558940762590837, "tokens_seen": 2020868096 }, { "epoch": 0.61, "learning_rate": 0.0003914299470389986, "loss": 0.0697, "theoretical_loss": 3.455857085094477, "tokens_seen": 2021130240 }, { "epoch": 0.61, "learning_rate": 0.00039134970309741615, "loss": 0.066, "theoretical_loss": 3.4558201000705653, "tokens_seen": 2021392384 }, { "epoch": 0.61, "learning_rate": 0.0003912694591558337, "loss": 0.0701, "theoretical_loss": 3.455783121185534, "tokens_seen": 2021654528 }, { "epoch": 0.61, "learning_rate": 0.0003911892152142513, "loss": 0.0651, "theoretical_loss": 3.4557461484375676, "tokens_seen": 2021916672 }, { "epoch": 0.61, "learning_rate": 0.00039110897127266894, "loss": 0.0692, "theoretical_loss": 3.4557091818248518, "tokens_seen": 2022178816 }, { "epoch": 0.61, "learning_rate": 0.0003910287273310865, "loss": 0.0682, "theoretical_loss": 3.4556722213455737, "tokens_seen": 2022440960 }, { "epoch": 0.61, "learning_rate": 0.0003909484833895041, "loss": 0.0674, "theoretical_loss": 3.455635266997921, "tokens_seen": 2022703104 }, { "epoch": 0.61, "learning_rate": 0.0003908682394479217, "loss": 0.0664, "theoretical_loss": 3.4555983187800825, "tokens_seen": 2022965248 }, { "epoch": 0.61, "learning_rate": 0.0003907879955063393, "loss": 0.0682, "theoretical_loss": 3.455561376690246, "tokens_seen": 2023227392 }, { "epoch": 0.61, "learning_rate": 0.00039070775156475684, "loss": 0.0678, "theoretical_loss": 3.4555244407266024, "tokens_seen": 2023489536 }, { "epoch": 0.61, "learning_rate": 0.00039062750762317445, "loss": 0.0688, "theoretical_loss": 3.455487510887342, "tokens_seen": 2023751680 }, { "epoch": 0.61, "learning_rate": 0.00039054726368159206, "loss": 0.0689, "theoretical_loss": 3.455450587170656, "tokens_seen": 2024013824 }, { "epoch": 0.61, "learning_rate": 0.0003904670197400096, "loss": 0.0705, "theoretical_loss": 3.455413669574737, "tokens_seen": 2024275968 }, { "epoch": 0.61, "learning_rate": 0.00039038677579842723, "loss": 0.068, "theoretical_loss": 3.4553767580977777, "tokens_seen": 2024538112 }, { "epoch": 0.61, "learning_rate": 0.00039030653185684485, "loss": 0.0681, "theoretical_loss": 3.4553398527379717, "tokens_seen": 2024800256 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0008612438105046749, "objective/train/docs_used": 738608, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2844445705413818, "objective/train/original_loss": 1.2844444513320923, "objective/train/theoretical_loss": 3.4553029534935136, "objective/train/tokens_used": 2045522400, "objective/train/value_avg": -0.00919342041015625, "objective/train/value_loss": 0.00018717643979471177, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.8095703125, "objective/train/value_reward_corr": 0.85092619597115, "objective/train/value_std": 0.0222930908203125, "objective/train/weight_avg": 1.0009496212005615, "objective/train/weighted_lm_loss": 1.2855502367019653, "objective/train/weights_max": 1.3055201768875122, "objective/train/weights_min": 0.3694390654563904, "theoretical_loss": 3.4553029534935136, "tokens_seen": 2025062400 }, { "epoch": 0.61, "learning_rate": 0.0003902262879152624, "loss": 0.0669, "theoretical_loss": 3.4553029534935136, "tokens_seen": 2025062400 }, { "epoch": 0.61, "learning_rate": 0.00039014604397367996, "loss": 0.0669, "theoretical_loss": 3.4552660603625984, "tokens_seen": 2025324544 }, { "epoch": 0.61, "learning_rate": 0.0003900658000320976, "loss": 0.067, "theoretical_loss": 3.455229173343423, "tokens_seen": 2025586688 }, { "epoch": 0.61, "learning_rate": 0.0003899855560905152, "loss": 0.068, "theoretical_loss": 3.455192292434183, "tokens_seen": 2025848832 }, { "epoch": 0.61, "learning_rate": 0.00038990531214893275, "loss": 0.0657, "theoretical_loss": 3.455155417633076, "tokens_seen": 2026110976 }, { "epoch": 0.61, "learning_rate": 0.00038982506820735036, "loss": 0.0652, "theoretical_loss": 3.4551185489383007, "tokens_seen": 2026373120 }, { "epoch": 0.61, "learning_rate": 0.00038974482426576797, "loss": 0.0675, "theoretical_loss": 3.4550816863480565, "tokens_seen": 2026635264 }, { "epoch": 0.61, "learning_rate": 0.00038966458032418553, "loss": 0.0678, "theoretical_loss": 3.455044829860543, "tokens_seen": 2026897408 }, { "epoch": 0.61, "learning_rate": 0.0003895843363826031, "loss": 0.0691, "theoretical_loss": 3.45500797947396, "tokens_seen": 2027159552 }, { "epoch": 0.61, "learning_rate": 0.0003895040924410207, "loss": 0.0679, "theoretical_loss": 3.45497113518651, "tokens_seen": 2027421696 }, { "epoch": 0.61, "learning_rate": 0.0003894238484994383, "loss": 0.0699, "theoretical_loss": 3.4549342969963943, "tokens_seen": 2027683840 }, { "epoch": 0.61, "learning_rate": 0.0003893436045578559, "loss": 0.0678, "theoretical_loss": 3.4548974649018165, "tokens_seen": 2027945984 }, { "epoch": 0.61, "learning_rate": 0.0003892633606162735, "loss": 0.0684, "theoretical_loss": 3.4548606389009793, "tokens_seen": 2028208128 }, { "epoch": 0.61, "learning_rate": 0.0003891831166746911, "loss": 0.0654, "theoretical_loss": 3.4548238189920877, "tokens_seen": 2028470272 }, { "epoch": 0.61, "learning_rate": 0.00038910287273310866, "loss": 0.0661, "theoretical_loss": 3.4547870051733467, "tokens_seen": 2028732416 }, { "epoch": 0.61, "learning_rate": 0.0003890226287915262, "loss": 0.0674, "theoretical_loss": 3.4547501974429626, "tokens_seen": 2028994560 }, { "epoch": 0.61, "learning_rate": 0.00038894238484994383, "loss": 0.066, "theoretical_loss": 3.454713395799142, "tokens_seen": 2029256704 }, { "epoch": 0.62, "learning_rate": 0.00038886214090836144, "loss": 0.068, "theoretical_loss": 3.4546766002400915, "tokens_seen": 2029518848 }, { "epoch": 0.62, "learning_rate": 0.000388781896966779, "loss": 0.0674, "theoretical_loss": 3.4546398107640197, "tokens_seen": 2029780992 }, { "epoch": 0.62, "learning_rate": 0.0003887016530251966, "loss": 0.0701, "theoretical_loss": 3.4546030273691364, "tokens_seen": 2030043136 }, { "epoch": 0.62, "learning_rate": 0.0003886214090836142, "loss": 0.0659, "theoretical_loss": 3.4545662500536505, "tokens_seen": 2030305280 }, { "epoch": 0.62, "learning_rate": 0.0003885411651420318, "loss": 0.071, "theoretical_loss": 3.4545294788157728, "tokens_seen": 2030567424 }, { "epoch": 0.62, "learning_rate": 0.00038846092120044934, "loss": 0.0687, "theoretical_loss": 3.454492713653714, "tokens_seen": 2030829568 }, { "epoch": 0.62, "learning_rate": 0.00038838067725886695, "loss": 0.0693, "theoretical_loss": 3.4544559545656863, "tokens_seen": 2031091712 }, { "epoch": 0.62, "learning_rate": 0.00038830043331728457, "loss": 0.0663, "theoretical_loss": 3.4544192015499027, "tokens_seen": 2031353856 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.0007523357635363936, "objective/train/docs_used": 741020, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3411290645599365, "objective/train/original_loss": 1.341128945350647, "objective/train/theoretical_loss": 3.454382454604577, "objective/train/tokens_used": 2052076000, "objective/train/value_avg": -0.007038116455078125, "objective/train/value_loss": 0.00034059895551763475, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.73388671875, "objective/train/value_reward_corr": 0.7047397016521502, "objective/train/value_std": 0.0164031982421875, "objective/train/weight_avg": 1.0008991956710815, "objective/train/weighted_lm_loss": 1.3422584533691406, "objective/train/weights_max": 1.5874963998794556, "objective/train/weights_min": 0.36873507499694824, "theoretical_loss": 3.454382454604577, "tokens_seen": 2031616000 }, { "epoch": 0.62, "learning_rate": 0.0003882201893757021, "loss": 0.0668, "theoretical_loss": 3.454382454604577, "tokens_seen": 2031616000 }, { "epoch": 0.62, "learning_rate": 0.00038813994543411974, "loss": 0.0666, "theoretical_loss": 3.454345713727923, "tokens_seen": 2031878144 }, { "epoch": 0.62, "learning_rate": 0.00038805970149253735, "loss": 0.0685, "theoretical_loss": 3.4543089789181556, "tokens_seen": 2032140288 }, { "epoch": 0.62, "learning_rate": 0.0003879794575509549, "loss": 0.0661, "theoretical_loss": 3.4542722501734904, "tokens_seen": 2032402432 }, { "epoch": 0.62, "learning_rate": 0.0003878992136093725, "loss": 0.0681, "theoretical_loss": 3.454235527492145, "tokens_seen": 2032664576 }, { "epoch": 0.62, "learning_rate": 0.0003878189696677901, "loss": 0.0645, "theoretical_loss": 3.4541988108723354, "tokens_seen": 2032926720 }, { "epoch": 0.62, "learning_rate": 0.00038773872572620764, "loss": 0.0698, "theoretical_loss": 3.4541621003122804, "tokens_seen": 2033188864 }, { "epoch": 0.62, "learning_rate": 0.00038765848178462525, "loss": 0.0707, "theoretical_loss": 3.4541253958101983, "tokens_seen": 2033451008 }, { "epoch": 0.62, "learning_rate": 0.00038757823784304286, "loss": 0.0679, "theoretical_loss": 3.454088697364309, "tokens_seen": 2033713152 }, { "epoch": 0.62, "learning_rate": 0.0003874979939014605, "loss": 0.0651, "theoretical_loss": 3.454052004972833, "tokens_seen": 2033975296 }, { "epoch": 0.62, "learning_rate": 0.00038741774995987803, "loss": 0.068, "theoretical_loss": 3.4540153186339912, "tokens_seen": 2034237440 }, { "epoch": 0.62, "learning_rate": 0.00038733750601829565, "loss": 0.0685, "theoretical_loss": 3.4539786383460047, "tokens_seen": 2034499584 }, { "epoch": 0.62, "learning_rate": 0.0003872572620767132, "loss": 0.0667, "theoretical_loss": 3.4539419641070968, "tokens_seen": 2034761728 }, { "epoch": 0.62, "learning_rate": 0.00038717701813513076, "loss": 0.0676, "theoretical_loss": 3.4539052959154906, "tokens_seen": 2035023872 }, { "epoch": 0.62, "learning_rate": 0.0003870967741935484, "loss": 0.0689, "theoretical_loss": 3.4538686337694102, "tokens_seen": 2035286016 }, { "epoch": 0.62, "learning_rate": 0.000387016530251966, "loss": 0.0687, "theoretical_loss": 3.45383197766708, "tokens_seen": 2035548160 }, { "epoch": 0.62, "learning_rate": 0.0003869362863103836, "loss": 0.0644, "theoretical_loss": 3.453795327606726, "tokens_seen": 2035810304 }, { "epoch": 0.62, "learning_rate": 0.00038685604236880116, "loss": 0.0687, "theoretical_loss": 3.4537586835865746, "tokens_seen": 2036072448 }, { "epoch": 0.62, "learning_rate": 0.00038677579842721877, "loss": 0.0698, "theoretical_loss": 3.4537220456048523, "tokens_seen": 2036334592 }, { "epoch": 0.62, "learning_rate": 0.0003866955544856364, "loss": 0.0683, "theoretical_loss": 3.453685413659788, "tokens_seen": 2036596736 }, { "epoch": 0.62, "learning_rate": 0.0003866153105440539, "loss": 0.0682, "theoretical_loss": 3.4536487877496085, "tokens_seen": 2036858880 }, { "epoch": 0.62, "learning_rate": 0.0003865350666024715, "loss": 0.0695, "theoretical_loss": 3.4536121678725444, "tokens_seen": 2037121024 }, { "epoch": 0.62, "learning_rate": 0.0003864548226608891, "loss": 0.0677, "theoretical_loss": 3.453575554026825, "tokens_seen": 2037383168 }, { "epoch": 0.62, "learning_rate": 0.0003863745787193067, "loss": 0.0652, "theoretical_loss": 3.4535389462106822, "tokens_seen": 2037645312 }, { "epoch": 0.62, "learning_rate": 0.0003862943347777243, "loss": 0.0686, "theoretical_loss": 3.4535023444223465, "tokens_seen": 2037907456 }, { "epoch": 0.62, "objective/train/advantage_avg": -0.00029666503542102873, "objective/train/docs_used": 743253, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3327980041503906, "objective/train/original_loss": 1.3327980041503906, "objective/train/theoretical_loss": 3.453465748660051, "objective/train/tokens_used": 2058629600, "objective/train/value_avg": -0.007541656494140625, "objective/train/value_loss": 0.0001606981095392257, "objective/train/value_max": -4.5418739318847656e-05, "objective/train/value_min": -0.45166015625, "objective/train/value_reward_corr": 0.7653297176662357, "objective/train/value_std": 0.01525115966796875, "objective/train/weight_avg": 0.9997789859771729, "objective/train/weighted_lm_loss": 1.3319969177246094, "objective/train/weights_max": 1.4667052030563354, "objective/train/weights_min": 0.3728525638580322, "theoretical_loss": 3.453465748660051, "tokens_seen": 2038169600 }, { "epoch": 0.62, "learning_rate": 0.0003862140908361419, "loss": 0.0676, "theoretical_loss": 3.453465748660051, "tokens_seen": 2038169600 }, { "epoch": 0.62, "learning_rate": 0.0003861338468945595, "loss": 0.0671, "theoretical_loss": 3.4534291589220274, "tokens_seen": 2038431744 }, { "epoch": 0.62, "learning_rate": 0.000386053602952977, "loss": 0.0685, "theoretical_loss": 3.4533925752065104, "tokens_seen": 2038693888 }, { "epoch": 0.62, "learning_rate": 0.00038597335901139463, "loss": 0.0684, "theoretical_loss": 3.4533559975117347, "tokens_seen": 2038956032 }, { "epoch": 0.62, "learning_rate": 0.00038589311506981224, "loss": 0.0697, "theoretical_loss": 3.453319425835935, "tokens_seen": 2039218176 }, { "epoch": 0.62, "learning_rate": 0.00038581287112822985, "loss": 0.0667, "theoretical_loss": 3.4532828601773478, "tokens_seen": 2039480320 }, { "epoch": 0.62, "learning_rate": 0.0003857326271866474, "loss": 0.0664, "theoretical_loss": 3.453246300534209, "tokens_seen": 2039742464 }, { "epoch": 0.62, "learning_rate": 0.000385652383245065, "loss": 0.0695, "theoretical_loss": 3.4532097469047573, "tokens_seen": 2040004608 }, { "epoch": 0.62, "learning_rate": 0.00038557213930348264, "loss": 0.0683, "theoretical_loss": 3.4531731992872303, "tokens_seen": 2040266752 }, { "epoch": 0.62, "learning_rate": 0.00038549189536190014, "loss": 0.0668, "theoretical_loss": 3.4531366576798668, "tokens_seen": 2040528896 }, { "epoch": 0.62, "learning_rate": 0.00038541165142031775, "loss": 0.0699, "theoretical_loss": 3.4531001220809068, "tokens_seen": 2040791040 }, { "epoch": 0.62, "learning_rate": 0.00038533140747873537, "loss": 0.0684, "theoretical_loss": 3.453063592488591, "tokens_seen": 2041053184 }, { "epoch": 0.62, "learning_rate": 0.0003852511635371529, "loss": 0.0663, "theoretical_loss": 3.4530270689011595, "tokens_seen": 2041315328 }, { "epoch": 0.62, "learning_rate": 0.00038517091959557054, "loss": 0.0692, "theoretical_loss": 3.4529905513168555, "tokens_seen": 2041577472 }, { "epoch": 0.62, "learning_rate": 0.00038509067565398815, "loss": 0.0675, "theoretical_loss": 3.452954039733921, "tokens_seen": 2041839616 }, { "epoch": 0.62, "learning_rate": 0.00038501043171240576, "loss": 0.0653, "theoretical_loss": 3.4529175341505995, "tokens_seen": 2042101760 }, { "epoch": 0.62, "learning_rate": 0.0003849301877708233, "loss": 0.0675, "theoretical_loss": 3.4528810345651357, "tokens_seen": 2042363904 }, { "epoch": 0.62, "learning_rate": 0.0003848499438292409, "loss": 0.0677, "theoretical_loss": 3.4528445409757738, "tokens_seen": 2042626048 }, { "epoch": 0.62, "learning_rate": 0.0003847696998876585, "loss": 0.0692, "theoretical_loss": 3.45280805338076, "tokens_seen": 2042888192 }, { "epoch": 0.62, "learning_rate": 0.00038468945594607605, "loss": 0.0658, "theoretical_loss": 3.45277157177834, "tokens_seen": 2043150336 }, { "epoch": 0.62, "learning_rate": 0.00038460921200449366, "loss": 0.0664, "theoretical_loss": 3.4527350961667613, "tokens_seen": 2043412480 }, { "epoch": 0.62, "learning_rate": 0.0003845289680629113, "loss": 0.0669, "theoretical_loss": 3.452698626544272, "tokens_seen": 2043674624 }, { "epoch": 0.62, "learning_rate": 0.0003844487241213289, "loss": 0.0674, "theoretical_loss": 3.45266216290912, "tokens_seen": 2043936768 }, { "epoch": 0.62, "learning_rate": 0.00038436848017974645, "loss": 0.0681, "theoretical_loss": 3.452625705259556, "tokens_seen": 2044198912 }, { "epoch": 0.62, "learning_rate": 0.000384288236238164, "loss": 0.0686, "theoretical_loss": 3.4525892535938283, "tokens_seen": 2044461056 }, { "epoch": 0.62, "objective/train/advantage_avg": 4.471498687053099e-05, "objective/train/docs_used": 745676, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2721974849700928, "objective/train/original_loss": 1.2721973657608032, "objective/train/theoretical_loss": 3.4525528079101884, "objective/train/tokens_used": 2065183200, "objective/train/value_avg": -0.006793975830078125, "objective/train/value_loss": 0.00025227837613783777, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.92236328125, "objective/train/value_reward_corr": 0.6778463347287009, "objective/train/value_std": 0.0141448974609375, "objective/train/weight_avg": 1.0001567602157593, "objective/train/weighted_lm_loss": 1.272079348564148, "objective/train/weights_max": 1.4578360319137573, "objective/train/weights_min": 0.3682824373245239, "theoretical_loss": 3.4525528079101884, "tokens_seen": 2044723200 }, { "epoch": 0.62, "learning_rate": 0.0003842079922965816, "loss": 0.068, "theoretical_loss": 3.4525528079101884, "tokens_seen": 2044723200 }, { "epoch": 0.62, "learning_rate": 0.0003841277483549992, "loss": 0.0681, "theoretical_loss": 3.4525163682068882, "tokens_seen": 2044985344 }, { "epoch": 0.62, "learning_rate": 0.0003840475044134168, "loss": 0.0706, "theoretical_loss": 3.45247993448218, "tokens_seen": 2045247488 }, { "epoch": 0.62, "learning_rate": 0.0003839672604718344, "loss": 0.067, "theoretical_loss": 3.4524435067343164, "tokens_seen": 2045509632 }, { "epoch": 0.62, "learning_rate": 0.000383887016530252, "loss": 0.0643, "theoretical_loss": 3.452407084961551, "tokens_seen": 2045771776 }, { "epoch": 0.62, "learning_rate": 0.00038380677258866957, "loss": 0.0643, "theoretical_loss": 3.452370669162139, "tokens_seen": 2046033920 }, { "epoch": 0.62, "learning_rate": 0.00038372652864708713, "loss": 0.0678, "theoretical_loss": 3.452334259334335, "tokens_seen": 2046296064 }, { "epoch": 0.62, "learning_rate": 0.00038364628470550474, "loss": 0.0676, "theoretical_loss": 3.452297855476395, "tokens_seen": 2046558208 }, { "epoch": 0.62, "learning_rate": 0.0003835660407639223, "loss": 0.0668, "theoretical_loss": 3.4522614575865753, "tokens_seen": 2046820352 }, { "epoch": 0.62, "learning_rate": 0.0003834857968223399, "loss": 0.0646, "theoretical_loss": 3.452225065663134, "tokens_seen": 2047082496 }, { "epoch": 0.62, "learning_rate": 0.0003834055528807575, "loss": 0.0687, "theoretical_loss": 3.4521886797043293, "tokens_seen": 2047344640 }, { "epoch": 0.62, "learning_rate": 0.0003833253089391751, "loss": 0.0661, "theoretical_loss": 3.4521522997084197, "tokens_seen": 2047606784 }, { "epoch": 0.62, "learning_rate": 0.0003832450649975927, "loss": 0.0683, "theoretical_loss": 3.452115925673665, "tokens_seen": 2047868928 }, { "epoch": 0.62, "learning_rate": 0.0003831648210560103, "loss": 0.063, "theoretical_loss": 3.4520795575983247, "tokens_seen": 2048131072 }, { "epoch": 0.62, "learning_rate": 0.00038308457711442787, "loss": 0.071, "theoretical_loss": 3.4520431954806607, "tokens_seen": 2048393216 }, { "epoch": 0.62, "learning_rate": 0.0003830043331728454, "loss": 0.068, "theoretical_loss": 3.452006839318935, "tokens_seen": 2048655360 }, { "epoch": 0.62, "learning_rate": 0.00038292408923126304, "loss": 0.0687, "theoretical_loss": 3.45197048911141, "tokens_seen": 2048917504 }, { "epoch": 0.62, "learning_rate": 0.00038284384528968065, "loss": 0.0693, "theoretical_loss": 3.451934144856348, "tokens_seen": 2049179648 }, { "epoch": 0.62, "learning_rate": 0.0003827636013480982, "loss": 0.0681, "theoretical_loss": 3.451897806552014, "tokens_seen": 2049441792 }, { "epoch": 0.62, "learning_rate": 0.0003826833574065158, "loss": 0.0692, "theoretical_loss": 3.451861474196672, "tokens_seen": 2049703936 }, { "epoch": 0.62, "learning_rate": 0.00038260311346493344, "loss": 0.0686, "theoretical_loss": 3.4518251477885884, "tokens_seen": 2049966080 }, { "epoch": 0.62, "learning_rate": 0.000382522869523351, "loss": 0.0713, "theoretical_loss": 3.4517888273260287, "tokens_seen": 2050228224 }, { "epoch": 0.62, "learning_rate": 0.00038244262558176855, "loss": 0.0677, "theoretical_loss": 3.4517525128072593, "tokens_seen": 2050490368 }, { "epoch": 0.62, "learning_rate": 0.00038236238164018616, "loss": 0.0669, "theoretical_loss": 3.4517162042305483, "tokens_seen": 2050752512 }, { "epoch": 0.62, "learning_rate": 0.0003822821376986038, "loss": 0.0671, "theoretical_loss": 3.4516799015941646, "tokens_seen": 2051014656 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.0005698420573025942, "objective/train/docs_used": 748156, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3751286268234253, "objective/train/original_loss": 1.3751286268234253, "objective/train/theoretical_loss": 3.451643604896377, "objective/train/tokens_used": 2071736800, "objective/train/value_avg": -0.010772705078125, "objective/train/value_loss": 0.00031448592199012637, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.966796875, "objective/train/value_reward_corr": 0.7301633754645884, "objective/train/value_std": 0.0200653076171875, "objective/train/weight_avg": 1.0007132291793823, "objective/train/weighted_lm_loss": 1.3746947050094604, "objective/train/weights_max": 1.821274757385254, "objective/train/weights_min": 0.370111882686615, "theoretical_loss": 3.451643604896377, "tokens_seen": 2051276800 }, { "epoch": 0.62, "learning_rate": 0.00038220189375702134, "loss": 0.0682, "theoretical_loss": 3.451643604896377, "tokens_seen": 2051276800 }, { "epoch": 0.62, "learning_rate": 0.00038212164981543895, "loss": 0.0699, "theoretical_loss": 3.4516073141354546, "tokens_seen": 2051538944 }, { "epoch": 0.62, "learning_rate": 0.00038204140587385656, "loss": 0.0691, "theoretical_loss": 3.451571029309668, "tokens_seen": 2051801088 }, { "epoch": 0.62, "learning_rate": 0.0003819611619322742, "loss": 0.0666, "theoretical_loss": 3.4515347504172893, "tokens_seen": 2052063232 }, { "epoch": 0.62, "learning_rate": 0.0003818809179906917, "loss": 0.0686, "theoretical_loss": 3.4514984774565898, "tokens_seen": 2052325376 }, { "epoch": 0.62, "learning_rate": 0.0003818006740491093, "loss": 0.0676, "theoretical_loss": 3.4514622104258423, "tokens_seen": 2052587520 }, { "epoch": 0.62, "learning_rate": 0.0003817204301075269, "loss": 0.0655, "theoretical_loss": 3.451425949323321, "tokens_seen": 2052849664 }, { "epoch": 0.62, "learning_rate": 0.00038164018616594446, "loss": 0.0668, "theoretical_loss": 3.451389694147298, "tokens_seen": 2053111808 }, { "epoch": 0.62, "learning_rate": 0.0003815599422243621, "loss": 0.068, "theoretical_loss": 3.45135344489605, "tokens_seen": 2053373952 }, { "epoch": 0.62, "learning_rate": 0.0003814796982827797, "loss": 0.0691, "theoretical_loss": 3.4513172015678526, "tokens_seen": 2053636096 }, { "epoch": 0.62, "learning_rate": 0.00038139945434119725, "loss": 0.0651, "theoretical_loss": 3.451280964160981, "tokens_seen": 2053898240 }, { "epoch": 0.62, "learning_rate": 0.0003813192103996148, "loss": 0.0677, "theoretical_loss": 3.451244732673713, "tokens_seen": 2054160384 }, { "epoch": 0.62, "learning_rate": 0.0003812389664580324, "loss": 0.0657, "theoretical_loss": 3.451208507104326, "tokens_seen": 2054422528 }, { "epoch": 0.62, "learning_rate": 0.00038115872251645003, "loss": 0.0675, "theoretical_loss": 3.451172287451098, "tokens_seen": 2054684672 }, { "epoch": 0.62, "learning_rate": 0.0003810784785748676, "loss": 0.0697, "theoretical_loss": 3.45113607371231, "tokens_seen": 2054946816 }, { "epoch": 0.62, "learning_rate": 0.0003809982346332852, "loss": 0.0673, "theoretical_loss": 3.4510998658862397, "tokens_seen": 2055208960 }, { "epoch": 0.62, "learning_rate": 0.0003809179906917028, "loss": 0.0673, "theoretical_loss": 3.451063663971169, "tokens_seen": 2055471104 }, { "epoch": 0.62, "learning_rate": 0.00038083774675012037, "loss": 0.0684, "theoretical_loss": 3.4510274679653787, "tokens_seen": 2055733248 }, { "epoch": 0.62, "learning_rate": 0.00038075750280853793, "loss": 0.0686, "theoretical_loss": 3.4509912778671517, "tokens_seen": 2055995392 }, { "epoch": 0.62, "learning_rate": 0.00038067725886695554, "loss": 0.0665, "theoretical_loss": 3.4509550936747697, "tokens_seen": 2056257536 }, { "epoch": 0.62, "learning_rate": 0.00038059701492537315, "loss": 0.0697, "theoretical_loss": 3.4509189153865165, "tokens_seen": 2056519680 }, { "epoch": 0.62, "learning_rate": 0.0003805167709837907, "loss": 0.0683, "theoretical_loss": 3.450882743000677, "tokens_seen": 2056781824 }, { "epoch": 0.62, "learning_rate": 0.0003804365270422083, "loss": 0.0706, "theoretical_loss": 3.450846576515535, "tokens_seen": 2057043968 }, { "epoch": 0.62, "learning_rate": 0.00038035628310062594, "loss": 0.0682, "theoretical_loss": 3.4508104159293773, "tokens_seen": 2057306112 }, { "epoch": 0.62, "learning_rate": 0.0003802760391590435, "loss": 0.0696, "theoretical_loss": 3.4507742612404897, "tokens_seen": 2057568256 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.0001572015753481537, "objective/train/docs_used": 750571, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4363138675689697, "objective/train/original_loss": 1.4363138675689697, "objective/train/theoretical_loss": 3.4507381124471594, "objective/train/tokens_used": 2078290400, "objective/train/value_avg": -0.00695037841796875, "objective/train/value_loss": 0.00017691683024168015, "objective/train/value_max": -3.427267074584961e-05, "objective/train/value_min": -0.2410888671875, "objective/train/value_reward_corr": 0.6409099064873156, "objective/train/value_std": 0.01142120361328125, "objective/train/weight_avg": 1.000238060951233, "objective/train/weighted_lm_loss": 1.4368598461151123, "objective/train/weights_max": 1.2355972528457642, "objective/train/weights_min": 0.37141308188438416, "theoretical_loss": 3.4507381124471594, "tokens_seen": 2057830400 }, { "epoch": 0.62, "learning_rate": 0.0003801957952174611, "loss": 0.0687, "theoretical_loss": 3.4507381124471594, "tokens_seen": 2057830400 }, { "epoch": 0.62, "learning_rate": 0.00038011555127587867, "loss": 0.0676, "theoretical_loss": 3.4507019695476737, "tokens_seen": 2058092544 }, { "epoch": 0.62, "learning_rate": 0.0003800353073342963, "loss": 0.0685, "theoretical_loss": 3.450665832540322, "tokens_seen": 2058354688 }, { "epoch": 0.62, "learning_rate": 0.00037995506339271384, "loss": 0.0672, "theoretical_loss": 3.450629701423393, "tokens_seen": 2058616832 }, { "epoch": 0.62, "learning_rate": 0.00037987481945113145, "loss": 0.0705, "theoretical_loss": 3.4505935761951765, "tokens_seen": 2058878976 }, { "epoch": 0.62, "learning_rate": 0.00037979457550954906, "loss": 0.0681, "theoretical_loss": 3.4505574568539634, "tokens_seen": 2059141120 }, { "epoch": 0.62, "learning_rate": 0.0003797143315679666, "loss": 0.0665, "theoretical_loss": 3.4505213433980453, "tokens_seen": 2059403264 }, { "epoch": 0.62, "learning_rate": 0.00037963408762638423, "loss": 0.0699, "theoretical_loss": 3.450485235825714, "tokens_seen": 2059665408 }, { "epoch": 0.62, "learning_rate": 0.0003795538436848018, "loss": 0.067, "theoretical_loss": 3.450449134135262, "tokens_seen": 2059927552 }, { "epoch": 0.62, "learning_rate": 0.00037947359974321935, "loss": 0.0673, "theoretical_loss": 3.4504130383249834, "tokens_seen": 2060189696 }, { "epoch": 0.62, "learning_rate": 0.00037939335580163696, "loss": 0.0676, "theoretical_loss": 3.4503769483931723, "tokens_seen": 2060451840 }, { "epoch": 0.62, "learning_rate": 0.0003793131118600546, "loss": 0.0667, "theoretical_loss": 3.4503408643381235, "tokens_seen": 2060713984 }, { "epoch": 0.62, "learning_rate": 0.0003792328679184722, "loss": 0.066, "theoretical_loss": 3.4503047861581324, "tokens_seen": 2060976128 }, { "epoch": 0.62, "learning_rate": 0.00037915262397688975, "loss": 0.069, "theoretical_loss": 3.4502687138514956, "tokens_seen": 2061238272 }, { "epoch": 0.62, "learning_rate": 0.00037907238003530736, "loss": 0.0687, "theoretical_loss": 3.4502326474165104, "tokens_seen": 2061500416 }, { "epoch": 0.62, "learning_rate": 0.0003789921360937249, "loss": 0.0693, "theoretical_loss": 3.450196586851474, "tokens_seen": 2061762560 }, { "epoch": 0.62, "learning_rate": 0.0003789118921521425, "loss": 0.069, "theoretical_loss": 3.450160532154685, "tokens_seen": 2062024704 }, { "epoch": 0.62, "learning_rate": 0.0003788316482105601, "loss": 0.0675, "theoretical_loss": 3.4501244833244438, "tokens_seen": 2062286848 }, { "epoch": 0.63, "learning_rate": 0.0003787514042689777, "loss": 0.0687, "theoretical_loss": 3.450088440359049, "tokens_seen": 2062548992 }, { "epoch": 0.63, "learning_rate": 0.0003786711603273953, "loss": 0.0666, "theoretical_loss": 3.450052403256801, "tokens_seen": 2062811136 }, { "epoch": 0.63, "learning_rate": 0.0003785909163858129, "loss": 0.0673, "theoretical_loss": 3.450016372016002, "tokens_seen": 2063073280 }, { "epoch": 0.63, "learning_rate": 0.0003785106724442305, "loss": 0.0694, "theoretical_loss": 3.4499803466349537, "tokens_seen": 2063335424 }, { "epoch": 0.63, "learning_rate": 0.0003784304285026481, "loss": 0.0695, "theoretical_loss": 3.449944327111959, "tokens_seen": 2063597568 }, { "epoch": 0.63, "learning_rate": 0.0003783501845610656, "loss": 0.0678, "theoretical_loss": 3.449908313445321, "tokens_seen": 2063859712 }, { "epoch": 0.63, "learning_rate": 0.0003782699406194832, "loss": 0.0677, "theoretical_loss": 3.4498723056333445, "tokens_seen": 2064121856 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.0011079288087785244, "objective/train/docs_used": 752866, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4321738481521606, "objective/train/original_loss": 1.4321739673614502, "objective/train/theoretical_loss": 3.4498363036743336, "objective/train/tokens_used": 2084844000, "objective/train/value_avg": -0.00701141357421875, "objective/train/value_loss": 0.0003250784066040069, "objective/train/value_max": -4.9114227294921875e-05, "objective/train/value_min": -0.75634765625, "objective/train/value_reward_corr": 0.65196970836296, "objective/train/value_std": 0.01462554931640625, "objective/train/weight_avg": 1.0012527704238892, "objective/train/weighted_lm_loss": 1.43328058719635, "objective/train/weights_max": 1.950271725654602, "objective/train/weights_min": 0.3689524829387665, "theoretical_loss": 3.4498363036743336, "tokens_seen": 2064384000 }, { "epoch": 0.63, "learning_rate": 0.00037818969667790083, "loss": 0.0716, "theoretical_loss": 3.4498363036743336, "tokens_seen": 2064384000 }, { "epoch": 0.63, "learning_rate": 0.00037810945273631844, "loss": 0.0667, "theoretical_loss": 3.449800307566594, "tokens_seen": 2064646144 }, { "epoch": 0.63, "learning_rate": 0.000378029208794736, "loss": 0.0681, "theoretical_loss": 3.449764317308432, "tokens_seen": 2064908288 }, { "epoch": 0.63, "learning_rate": 0.0003779489648531536, "loss": 0.0703, "theoretical_loss": 3.449728332898155, "tokens_seen": 2065170432 }, { "epoch": 0.63, "learning_rate": 0.0003778687209115712, "loss": 0.0662, "theoretical_loss": 3.4496923543340703, "tokens_seen": 2065432576 }, { "epoch": 0.63, "learning_rate": 0.00037778847696998873, "loss": 0.0692, "theoretical_loss": 3.4496563816144867, "tokens_seen": 2065694720 }, { "epoch": 0.63, "learning_rate": 0.00037770823302840634, "loss": 0.0676, "theoretical_loss": 3.449620414737713, "tokens_seen": 2065956864 }, { "epoch": 0.63, "learning_rate": 0.00037762798908682395, "loss": 0.0661, "theoretical_loss": 3.449584453702059, "tokens_seen": 2066219008 }, { "epoch": 0.63, "learning_rate": 0.0003775477451452415, "loss": 0.0675, "theoretical_loss": 3.449548498505834, "tokens_seen": 2066481152 }, { "epoch": 0.63, "learning_rate": 0.0003774675012036591, "loss": 0.0714, "theoretical_loss": 3.4495125491473515, "tokens_seen": 2066743296 }, { "epoch": 0.63, "learning_rate": 0.00037738725726207674, "loss": 0.0674, "theoretical_loss": 3.449476605624922, "tokens_seen": 2067005440 }, { "epoch": 0.63, "learning_rate": 0.00037730701332049435, "loss": 0.0682, "theoretical_loss": 3.4494406679368583, "tokens_seen": 2067267584 }, { "epoch": 0.63, "learning_rate": 0.0003772267693789119, "loss": 0.0696, "theoretical_loss": 3.449404736081474, "tokens_seen": 2067529728 }, { "epoch": 0.63, "learning_rate": 0.00037714652543732947, "loss": 0.0713, "theoretical_loss": 3.4493688100570825, "tokens_seen": 2067791872 }, { "epoch": 0.63, "learning_rate": 0.0003770662814957471, "loss": 0.0702, "theoretical_loss": 3.4493328898619993, "tokens_seen": 2068054016 }, { "epoch": 0.63, "learning_rate": 0.00037698603755416464, "loss": 0.0676, "theoretical_loss": 3.449296975494539, "tokens_seen": 2068316160 }, { "epoch": 0.63, "learning_rate": 0.00037690579361258225, "loss": 0.0667, "theoretical_loss": 3.449261066953018, "tokens_seen": 2068578304 }, { "epoch": 0.63, "learning_rate": 0.00037682554967099986, "loss": 0.0687, "theoretical_loss": 3.4492251642357536, "tokens_seen": 2068840448 }, { "epoch": 0.63, "learning_rate": 0.0003767453057294175, "loss": 0.0695, "theoretical_loss": 3.4491892673410627, "tokens_seen": 2069102592 }, { "epoch": 0.63, "learning_rate": 0.00037666506178783503, "loss": 0.069, "theoretical_loss": 3.449153376267264, "tokens_seen": 2069364736 }, { "epoch": 0.63, "learning_rate": 0.0003765848178462526, "loss": 0.0681, "theoretical_loss": 3.449117491012676, "tokens_seen": 2069626880 }, { "epoch": 0.63, "learning_rate": 0.0003765045739046702, "loss": 0.0674, "theoretical_loss": 3.449081611575618, "tokens_seen": 2069889024 }, { "epoch": 0.63, "learning_rate": 0.00037642432996308776, "loss": 0.0675, "theoretical_loss": 3.4490457379544113, "tokens_seen": 2070151168 }, { "epoch": 0.63, "learning_rate": 0.0003763440860215054, "loss": 0.0706, "theoretical_loss": 3.4490098701473757, "tokens_seen": 2070413312 }, { "epoch": 0.63, "learning_rate": 0.000376263842079923, "loss": 0.0673, "theoretical_loss": 3.448974008152834, "tokens_seen": 2070675456 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.0006079924060031772, "objective/train/docs_used": 755047, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3184467554092407, "objective/train/original_loss": 1.3184466361999512, "objective/train/theoretical_loss": 3.4489381519691085, "objective/train/tokens_used": 2091397600, "objective/train/value_avg": -0.00807952880859375, "objective/train/value_loss": 0.0003275515919085592, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.6318359375, "objective/train/value_reward_corr": 0.6679204773929055, "objective/train/value_std": 0.0141143798828125, "objective/train/weight_avg": 1.0007538795471191, "objective/train/weighted_lm_loss": 1.3194092512130737, "objective/train/weights_max": 1.8273200988769531, "objective/train/weights_min": 0.3784526288509369, "theoretical_loss": 3.4489381519691085, "tokens_seen": 2070937600 }, { "epoch": 0.63, "learning_rate": 0.0003761835981383406, "loss": 0.0685, "theoretical_loss": 3.4489381519691085, "tokens_seen": 2070937600 }, { "epoch": 0.63, "learning_rate": 0.00037610335419675816, "loss": 0.0707, "theoretical_loss": 3.4489023015945213, "tokens_seen": 2071199744 }, { "epoch": 0.63, "learning_rate": 0.0003760231102551757, "loss": 0.0684, "theoretical_loss": 3.448866457027397, "tokens_seen": 2071461888 }, { "epoch": 0.63, "learning_rate": 0.00037594286631359333, "loss": 0.0686, "theoretical_loss": 3.4488306182660597, "tokens_seen": 2071724032 }, { "epoch": 0.63, "learning_rate": 0.0003758626223720109, "loss": 0.0713, "theoretical_loss": 3.448794785308835, "tokens_seen": 2071986176 }, { "epoch": 0.63, "learning_rate": 0.0003757823784304285, "loss": 0.0691, "theoretical_loss": 3.4487589581540483, "tokens_seen": 2072248320 }, { "epoch": 0.63, "learning_rate": 0.0003757021344888461, "loss": 0.0677, "theoretical_loss": 3.4487231368000266, "tokens_seen": 2072510464 }, { "epoch": 0.63, "learning_rate": 0.0003756218905472637, "loss": 0.0676, "theoretical_loss": 3.448687321245097, "tokens_seen": 2072772608 }, { "epoch": 0.63, "learning_rate": 0.0003755416466056813, "loss": 0.066, "theoretical_loss": 3.4486515114875873, "tokens_seen": 2073034752 }, { "epoch": 0.63, "learning_rate": 0.0003754614026640989, "loss": 0.0704, "theoretical_loss": 3.448615707525826, "tokens_seen": 2073296896 }, { "epoch": 0.63, "learning_rate": 0.00037538115872251646, "loss": 0.0687, "theoretical_loss": 3.448579909358143, "tokens_seen": 2073559040 }, { "epoch": 0.63, "learning_rate": 0.000375300914780934, "loss": 0.0687, "theoretical_loss": 3.448544116982868, "tokens_seen": 2073821184 }, { "epoch": 0.63, "learning_rate": 0.00037522067083935163, "loss": 0.0696, "theoretical_loss": 3.448508330398332, "tokens_seen": 2074083328 }, { "epoch": 0.63, "learning_rate": 0.00037514042689776924, "loss": 0.0672, "theoretical_loss": 3.448472549602866, "tokens_seen": 2074345472 }, { "epoch": 0.63, "learning_rate": 0.0003750601829561868, "loss": 0.0651, "theoretical_loss": 3.4484367745948026, "tokens_seen": 2074607616 }, { "epoch": 0.63, "learning_rate": 0.0003749799390146044, "loss": 0.0698, "theoretical_loss": 3.4484010053724736, "tokens_seen": 2074869760 }, { "epoch": 0.63, "learning_rate": 0.000374899695073022, "loss": 0.0687, "theoretical_loss": 3.448365241934214, "tokens_seen": 2075131904 }, { "epoch": 0.63, "learning_rate": 0.0003748194511314396, "loss": 0.0674, "theoretical_loss": 3.4483294842783563, "tokens_seen": 2075394048 }, { "epoch": 0.63, "learning_rate": 0.00037473920718985714, "loss": 0.0695, "theoretical_loss": 3.4482937324032368, "tokens_seen": 2075656192 }, { "epoch": 0.63, "learning_rate": 0.00037465896324827475, "loss": 0.0709, "theoretical_loss": 3.4482579863071905, "tokens_seen": 2075918336 }, { "epoch": 0.63, "learning_rate": 0.00037457871930669237, "loss": 0.0681, "theoretical_loss": 3.4482222459885534, "tokens_seen": 2076180480 }, { "epoch": 0.63, "learning_rate": 0.0003744984753651099, "loss": 0.0678, "theoretical_loss": 3.4481865114456625, "tokens_seen": 2076442624 }, { "epoch": 0.63, "learning_rate": 0.00037441823142352754, "loss": 0.0671, "theoretical_loss": 3.448150782676856, "tokens_seen": 2076704768 }, { "epoch": 0.63, "learning_rate": 0.00037433798748194515, "loss": 0.0642, "theoretical_loss": 3.4481150596804717, "tokens_seen": 2076966912 }, { "epoch": 0.63, "learning_rate": 0.0003742577435403627, "loss": 0.0668, "theoretical_loss": 3.4480793424548493, "tokens_seen": 2077229056 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.0019641020335257053, "objective/train/docs_used": 757304, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3592127561569214, "objective/train/original_loss": 1.3592126369476318, "objective/train/theoretical_loss": 3.4480436309983276, "objective/train/tokens_used": 2097951200, "objective/train/value_avg": -0.0092926025390625, "objective/train/value_loss": 0.0001754560216795653, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.55322265625, "objective/train/value_reward_corr": 0.7035732028469984, "objective/train/value_std": 0.01471710205078125, "objective/train/weight_avg": 1.0020464658737183, "objective/train/weighted_lm_loss": 1.3608214855194092, "objective/train/weights_max": 1.3589965105056763, "objective/train/weights_min": 0.36952081322669983, "theoretical_loss": 3.4480436309983276, "tokens_seen": 2077491200 }, { "epoch": 0.63, "learning_rate": 0.00037417749959878027, "loss": 0.0667, "theoretical_loss": 3.4480436309983276, "tokens_seen": 2077491200 }, { "epoch": 0.63, "learning_rate": 0.0003740972556571979, "loss": 0.0689, "theoretical_loss": 3.448007925309247, "tokens_seen": 2077753344 }, { "epoch": 0.63, "learning_rate": 0.0003740170117156155, "loss": 0.0695, "theoretical_loss": 3.447972225385949, "tokens_seen": 2078015488 }, { "epoch": 0.63, "learning_rate": 0.00037393676777403305, "loss": 0.0651, "theoretical_loss": 3.447936531226776, "tokens_seen": 2078277632 }, { "epoch": 0.63, "learning_rate": 0.00037385652383245066, "loss": 0.0652, "theoretical_loss": 3.4479008428300686, "tokens_seen": 2078539776 }, { "epoch": 0.63, "learning_rate": 0.0003737762798908683, "loss": 0.0678, "theoretical_loss": 3.447865160194171, "tokens_seen": 2078801920 }, { "epoch": 0.63, "learning_rate": 0.0003736960359492859, "loss": 0.0688, "theoretical_loss": 3.447829483317428, "tokens_seen": 2079064064 }, { "epoch": 0.63, "learning_rate": 0.0003736157920077034, "loss": 0.0656, "theoretical_loss": 3.4477938121981824, "tokens_seen": 2079326208 }, { "epoch": 0.63, "learning_rate": 0.000373535548066121, "loss": 0.0645, "theoretical_loss": 3.44775814683478, "tokens_seen": 2079588352 }, { "epoch": 0.63, "learning_rate": 0.0003734553041245386, "loss": 0.0665, "theoretical_loss": 3.447722487225567, "tokens_seen": 2079850496 }, { "epoch": 0.63, "learning_rate": 0.0003733750601829562, "loss": 0.064, "theoretical_loss": 3.4476868333688904, "tokens_seen": 2080112640 }, { "epoch": 0.63, "learning_rate": 0.0003732948162413738, "loss": 0.0684, "theoretical_loss": 3.447651185263096, "tokens_seen": 2080374784 }, { "epoch": 0.63, "learning_rate": 0.0003732145722997914, "loss": 0.0677, "theoretical_loss": 3.447615542906532, "tokens_seen": 2080636928 }, { "epoch": 0.63, "learning_rate": 0.00037313432835820896, "loss": 0.0693, "theoretical_loss": 3.4475799062975483, "tokens_seen": 2080899072 }, { "epoch": 0.63, "learning_rate": 0.0003730540844166265, "loss": 0.0679, "theoretical_loss": 3.4475442754344927, "tokens_seen": 2081161216 }, { "epoch": 0.63, "learning_rate": 0.00037297384047504413, "loss": 0.0641, "theoretical_loss": 3.447508650315716, "tokens_seen": 2081423360 }, { "epoch": 0.63, "learning_rate": 0.00037289359653346174, "loss": 0.0664, "theoretical_loss": 3.4474730309395687, "tokens_seen": 2081685504 }, { "epoch": 0.63, "learning_rate": 0.0003728133525918793, "loss": 0.0708, "theoretical_loss": 3.4474374173044025, "tokens_seen": 2081947648 }, { "epoch": 0.63, "learning_rate": 0.0003727331086502969, "loss": 0.0684, "theoretical_loss": 3.4474018094085683, "tokens_seen": 2082209792 }, { "epoch": 0.63, "learning_rate": 0.0003726528647087145, "loss": 0.0689, "theoretical_loss": 3.4473662072504196, "tokens_seen": 2082471936 }, { "epoch": 0.63, "learning_rate": 0.0003725726207671321, "loss": 0.0676, "theoretical_loss": 3.4473306108283097, "tokens_seen": 2082734080 }, { "epoch": 0.63, "learning_rate": 0.0003724923768255497, "loss": 0.0677, "theoretical_loss": 3.4472950201405923, "tokens_seen": 2082996224 }, { "epoch": 0.63, "learning_rate": 0.00037241213288396726, "loss": 0.0656, "theoretical_loss": 3.4472594351856225, "tokens_seen": 2083258368 }, { "epoch": 0.63, "learning_rate": 0.00037233188894238487, "loss": 0.0687, "theoretical_loss": 3.447223855961756, "tokens_seen": 2083520512 }, { "epoch": 0.63, "learning_rate": 0.0003722516450008024, "loss": 0.0677, "theoretical_loss": 3.447188282467348, "tokens_seen": 2083782656 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.001002849661745131, "objective/train/docs_used": 759560, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4999350309371948, "objective/train/original_loss": 1.4999351501464844, "objective/train/theoretical_loss": 3.4471527147007555, "objective/train/tokens_used": 2104504800, "objective/train/value_avg": -0.0079498291015625, "objective/train/value_loss": 0.00027177634183317423, "objective/train/value_max": -4.7206878662109375e-05, "objective/train/value_min": -0.74853515625, "objective/train/value_reward_corr": 0.6737963482430739, "objective/train/value_std": 0.016357421875, "objective/train/weight_avg": 1.0011271238327026, "objective/train/weighted_lm_loss": 1.5015414953231812, "objective/train/weights_max": 1.7329145669937134, "objective/train/weights_min": 0.38661831617355347, "theoretical_loss": 3.4471527147007555, "tokens_seen": 2084044800 }, { "epoch": 0.63, "learning_rate": 0.00037217140105922004, "loss": 0.0712, "theoretical_loss": 3.4471527147007555, "tokens_seen": 2084044800 }, { "epoch": 0.63, "learning_rate": 0.00037209115711763765, "loss": 0.0672, "theoretical_loss": 3.447117152660337, "tokens_seen": 2084306944 }, { "epoch": 0.63, "learning_rate": 0.0003720109131760552, "loss": 0.0686, "theoretical_loss": 3.447081596344449, "tokens_seen": 2084569088 }, { "epoch": 0.63, "learning_rate": 0.0003719306692344728, "loss": 0.0678, "theoretical_loss": 3.447046045751451, "tokens_seen": 2084831232 }, { "epoch": 0.63, "learning_rate": 0.0003718504252928904, "loss": 0.066, "theoretical_loss": 3.447010500879703, "tokens_seen": 2085093376 }, { "epoch": 0.63, "learning_rate": 0.000371770181351308, "loss": 0.0648, "theoretical_loss": 3.4469749617275642, "tokens_seen": 2085355520 }, { "epoch": 0.63, "learning_rate": 0.00037168993740972555, "loss": 0.066, "theoretical_loss": 3.4469394282933967, "tokens_seen": 2085617664 }, { "epoch": 0.63, "learning_rate": 0.00037160969346814316, "loss": 0.0699, "theoretical_loss": 3.4469039005755606, "tokens_seen": 2085879808 }, { "epoch": 0.63, "learning_rate": 0.0003715294495265608, "loss": 0.069, "theoretical_loss": 3.446868378572419, "tokens_seen": 2086141952 }, { "epoch": 0.63, "learning_rate": 0.00037144920558497834, "loss": 0.0693, "theoretical_loss": 3.446832862282334, "tokens_seen": 2086404096 }, { "epoch": 0.63, "learning_rate": 0.00037136896164339595, "loss": 0.0656, "theoretical_loss": 3.44679735170367, "tokens_seen": 2086666240 }, { "epoch": 0.63, "learning_rate": 0.0003712887177018135, "loss": 0.0656, "theoretical_loss": 3.4467618468347903, "tokens_seen": 2086928384 }, { "epoch": 0.63, "learning_rate": 0.00037120847376023107, "loss": 0.0667, "theoretical_loss": 3.44672634767406, "tokens_seen": 2087190528 }, { "epoch": 0.63, "learning_rate": 0.0003711282298186487, "loss": 0.0668, "theoretical_loss": 3.4466908542198453, "tokens_seen": 2087452672 }, { "epoch": 0.63, "learning_rate": 0.0003710479858770663, "loss": 0.0679, "theoretical_loss": 3.4466553664705124, "tokens_seen": 2087714816 }, { "epoch": 0.63, "learning_rate": 0.0003709677419354839, "loss": 0.0669, "theoretical_loss": 3.446619884424427, "tokens_seen": 2087976960 }, { "epoch": 0.63, "learning_rate": 0.00037088749799390146, "loss": 0.0678, "theoretical_loss": 3.446584408079958, "tokens_seen": 2088239104 }, { "epoch": 0.63, "learning_rate": 0.0003708072540523191, "loss": 0.0692, "theoretical_loss": 3.4465489374354727, "tokens_seen": 2088501248 }, { "epoch": 0.63, "learning_rate": 0.0003707270101107367, "loss": 0.0687, "theoretical_loss": 3.4465134724893405, "tokens_seen": 2088763392 }, { "epoch": 0.63, "learning_rate": 0.0003706467661691542, "loss": 0.0681, "theoretical_loss": 3.446478013239931, "tokens_seen": 2089025536 }, { "epoch": 0.63, "learning_rate": 0.0003705665222275718, "loss": 0.0694, "theoretical_loss": 3.4464425596856136, "tokens_seen": 2089287680 }, { "epoch": 0.63, "learning_rate": 0.0003704862782859894, "loss": 0.0691, "theoretical_loss": 3.446407111824761, "tokens_seen": 2089549824 }, { "epoch": 0.63, "learning_rate": 0.00037040603434440703, "loss": 0.0684, "theoretical_loss": 3.4463716696557425, "tokens_seen": 2089811968 }, { "epoch": 0.63, "learning_rate": 0.0003703257904028246, "loss": 0.0675, "theoretical_loss": 3.4463362331769325, "tokens_seen": 2090074112 }, { "epoch": 0.63, "learning_rate": 0.0003702455464612422, "loss": 0.0671, "theoretical_loss": 3.4463008023867028, "tokens_seen": 2090336256 }, { "epoch": 0.63, "objective/train/advantage_avg": -0.00044457524199970067, "objective/train/docs_used": 762048, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4181301593780518, "objective/train/original_loss": 1.4181302785873413, "objective/train/theoretical_loss": 3.4462653772834266, "objective/train/tokens_used": 2111058400, "objective/train/value_avg": -0.00865936279296875, "objective/train/value_loss": 0.0003374247462488711, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.91943359375, "objective/train/value_reward_corr": 0.747619425608935, "objective/train/value_std": 0.0189666748046875, "objective/train/weight_avg": 0.9997140765190125, "objective/train/weighted_lm_loss": 1.4173901081085205, "objective/train/weights_max": 1.5935001373291016, "objective/train/weights_min": 0.3686169385910034, "theoretical_loss": 3.4462653772834266, "tokens_seen": 2090598400 }, { "epoch": 0.63, "learning_rate": 0.0003701653025196598, "loss": 0.0694, "theoretical_loss": 3.4462653772834266, "tokens_seen": 2090598400 }, { "epoch": 0.63, "learning_rate": 0.0003700850585780773, "loss": 0.0679, "theoretical_loss": 3.446229957865479, "tokens_seen": 2090860544 }, { "epoch": 0.63, "learning_rate": 0.00037000481463649493, "loss": 0.069, "theoretical_loss": 3.4461945441312354, "tokens_seen": 2091122688 }, { "epoch": 0.63, "learning_rate": 0.00036992457069491254, "loss": 0.067, "theoretical_loss": 3.4461591360790704, "tokens_seen": 2091384832 }, { "epoch": 0.63, "learning_rate": 0.00036984432675333015, "loss": 0.0657, "theoretical_loss": 3.44612373370736, "tokens_seen": 2091646976 }, { "epoch": 0.63, "learning_rate": 0.0003697640828117477, "loss": 0.0687, "theoretical_loss": 3.446088337014482, "tokens_seen": 2091909120 }, { "epoch": 0.63, "learning_rate": 0.0003696838388701653, "loss": 0.0684, "theoretical_loss": 3.446052945998814, "tokens_seen": 2092171264 }, { "epoch": 0.63, "learning_rate": 0.00036960359492858294, "loss": 0.068, "theoretical_loss": 3.446017560658734, "tokens_seen": 2092433408 }, { "epoch": 0.63, "learning_rate": 0.00036952335098700044, "loss": 0.0673, "theoretical_loss": 3.4459821809926208, "tokens_seen": 2092695552 }, { "epoch": 0.63, "learning_rate": 0.00036944310704541805, "loss": 0.0667, "theoretical_loss": 3.4459468069988537, "tokens_seen": 2092957696 }, { "epoch": 0.63, "learning_rate": 0.00036936286310383567, "loss": 0.0666, "theoretical_loss": 3.445911438675814, "tokens_seen": 2093219840 }, { "epoch": 0.63, "learning_rate": 0.0003692826191622532, "loss": 0.0638, "theoretical_loss": 3.445876076021882, "tokens_seen": 2093481984 }, { "epoch": 0.63, "learning_rate": 0.00036920237522067084, "loss": 0.0683, "theoretical_loss": 3.4458407190354388, "tokens_seen": 2093744128 }, { "epoch": 0.63, "learning_rate": 0.00036912213127908845, "loss": 0.0666, "theoretical_loss": 3.445805367714868, "tokens_seen": 2094006272 }, { "epoch": 0.63, "learning_rate": 0.00036904188733750606, "loss": 0.0662, "theoretical_loss": 3.445770022058551, "tokens_seen": 2094268416 }, { "epoch": 0.63, "learning_rate": 0.0003689616433959236, "loss": 0.0678, "theoretical_loss": 3.445734682064873, "tokens_seen": 2094530560 }, { "epoch": 0.63, "learning_rate": 0.0003688813994543412, "loss": 0.0664, "theoretical_loss": 3.4456993477322166, "tokens_seen": 2094792704 }, { "epoch": 0.63, "learning_rate": 0.0003688011555127588, "loss": 0.0667, "theoretical_loss": 3.4456640190589676, "tokens_seen": 2095054848 }, { "epoch": 0.63, "learning_rate": 0.00036872091157117635, "loss": 0.0685, "theoretical_loss": 3.445628696043512, "tokens_seen": 2095316992 }, { "epoch": 0.64, "learning_rate": 0.00036864066762959396, "loss": 0.0664, "theoretical_loss": 3.4455933786842348, "tokens_seen": 2095579136 }, { "epoch": 0.64, "learning_rate": 0.0003685604236880116, "loss": 0.0699, "theoretical_loss": 3.4455580669795243, "tokens_seen": 2095841280 }, { "epoch": 0.64, "learning_rate": 0.0003684801797464292, "loss": 0.0666, "theoretical_loss": 3.4455227609277674, "tokens_seen": 2096103424 }, { "epoch": 0.64, "learning_rate": 0.00036839993580484675, "loss": 0.0709, "theoretical_loss": 3.445487460527352, "tokens_seen": 2096365568 }, { "epoch": 0.64, "learning_rate": 0.0003683196918632643, "loss": 0.0693, "theoretical_loss": 3.4454521657766675, "tokens_seen": 2096627712 }, { "epoch": 0.64, "learning_rate": 0.0003682394479216819, "loss": 0.0698, "theoretical_loss": 3.445416876674104, "tokens_seen": 2096889856 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.0006668668356724083, "objective/train/docs_used": 764413, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4101420640945435, "objective/train/original_loss": 1.4101420640945435, "objective/train/theoretical_loss": 3.44538159321805, "objective/train/tokens_used": 2117612000, "objective/train/value_avg": -0.00644683837890625, "objective/train/value_loss": 0.0001926971017383039, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.8056640625, "objective/train/value_reward_corr": 0.6798013959111557, "objective/train/value_std": 0.0125732421875, "objective/train/weight_avg": 1.00075101852417, "objective/train/weighted_lm_loss": 1.4113109111785889, "objective/train/weights_max": 1.2963095903396606, "objective/train/weights_min": 0.22838670015335083, "theoretical_loss": 3.44538159321805, "tokens_seen": 2097152000 }, { "epoch": 0.64, "learning_rate": 0.0003681592039800995, "loss": 0.0677, "theoretical_loss": 3.44538159321805, "tokens_seen": 2097152000 }, { "epoch": 0.64, "learning_rate": 0.0003680789600385171, "loss": 0.0676, "theoretical_loss": 3.4453463154068977, "tokens_seen": 2097414144 }, { "epoch": 0.64, "learning_rate": 0.0003679987160969347, "loss": 0.0691, "theoretical_loss": 3.4453110432390384, "tokens_seen": 2097676288 }, { "epoch": 0.64, "learning_rate": 0.0003679184721553523, "loss": 0.0683, "theoretical_loss": 3.4452757767128643, "tokens_seen": 2097938432 }, { "epoch": 0.64, "learning_rate": 0.0003678382282137699, "loss": 0.0644, "theoretical_loss": 3.445240515826768, "tokens_seen": 2098200576 }, { "epoch": 0.64, "learning_rate": 0.0003677579842721875, "loss": 0.0694, "theoretical_loss": 3.4452052605791432, "tokens_seen": 2098462720 }, { "epoch": 0.64, "learning_rate": 0.00036767774033060504, "loss": 0.0672, "theoretical_loss": 3.4451700109683836, "tokens_seen": 2098724864 }, { "epoch": 0.64, "learning_rate": 0.0003675974963890226, "loss": 0.0685, "theoretical_loss": 3.4451347669928856, "tokens_seen": 2098987008 }, { "epoch": 0.64, "learning_rate": 0.0003675172524474402, "loss": 0.0691, "theoretical_loss": 3.4450995286510424, "tokens_seen": 2099249152 }, { "epoch": 0.64, "learning_rate": 0.00036743700850585783, "loss": 0.0689, "theoretical_loss": 3.445064295941252, "tokens_seen": 2099511296 }, { "epoch": 0.64, "learning_rate": 0.00036735676456427544, "loss": 0.0667, "theoretical_loss": 3.4450290688619103, "tokens_seen": 2099773440 }, { "epoch": 0.64, "learning_rate": 0.000367276520622693, "loss": 0.0637, "theoretical_loss": 3.444993847411415, "tokens_seen": 2100035584 }, { "epoch": 0.64, "learning_rate": 0.0003671962766811106, "loss": 0.0689, "theoretical_loss": 3.4449586315881637, "tokens_seen": 2100297728 }, { "epoch": 0.64, "learning_rate": 0.00036711603273952817, "loss": 0.065, "theoretical_loss": 3.4449234213905564, "tokens_seen": 2100559872 }, { "epoch": 0.64, "learning_rate": 0.00036703578879794573, "loss": 0.0652, "theoretical_loss": 3.4448882168169908, "tokens_seen": 2100822016 }, { "epoch": 0.64, "learning_rate": 0.00036695554485636334, "loss": 0.0654, "theoretical_loss": 3.444853017865869, "tokens_seen": 2101084160 }, { "epoch": 0.64, "learning_rate": 0.00036687530091478095, "loss": 0.0646, "theoretical_loss": 3.4448178245355896, "tokens_seen": 2101346304 }, { "epoch": 0.64, "learning_rate": 0.0003667950569731985, "loss": 0.0657, "theoretical_loss": 3.4447826368245558, "tokens_seen": 2101608448 }, { "epoch": 0.64, "learning_rate": 0.0003667148130316161, "loss": 0.0659, "theoretical_loss": 3.4447474547311683, "tokens_seen": 2101870592 }, { "epoch": 0.64, "learning_rate": 0.00036663456909003374, "loss": 0.0669, "theoretical_loss": 3.4447122782538306, "tokens_seen": 2102132736 }, { "epoch": 0.64, "learning_rate": 0.0003665543251484513, "loss": 0.0655, "theoretical_loss": 3.444677107390946, "tokens_seen": 2102394880 }, { "epoch": 0.64, "learning_rate": 0.00036647408120686885, "loss": 0.0644, "theoretical_loss": 3.444641942140918, "tokens_seen": 2102657024 }, { "epoch": 0.64, "learning_rate": 0.00036639383726528647, "loss": 0.0662, "theoretical_loss": 3.4446067825021514, "tokens_seen": 2102919168 }, { "epoch": 0.64, "learning_rate": 0.0003663135933237041, "loss": 0.0657, "theoretical_loss": 3.444571628473052, "tokens_seen": 2103181312 }, { "epoch": 0.64, "learning_rate": 0.00036623334938212164, "loss": 0.0662, "theoretical_loss": 3.4445364800520255, "tokens_seen": 2103443456 }, { "epoch": 0.64, "objective/train/advantage_avg": -0.0007185638532973826, "objective/train/docs_used": 766743, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4546356201171875, "objective/train/original_loss": 1.4546356201171875, "objective/train/theoretical_loss": 3.444501337237478, "objective/train/tokens_used": 2124165600, "objective/train/value_avg": -0.0100555419921875, "objective/train/value_loss": 0.00047717380221001804, "objective/train/value_max": -3.218650817871094e-05, "objective/train/value_min": -0.8232421875, "objective/train/value_reward_corr": 0.7003188218258403, "objective/train/value_std": 0.0203704833984375, "objective/train/weight_avg": 0.999488353729248, "objective/train/weighted_lm_loss": 1.453125238418579, "objective/train/weights_max": 1.6461470127105713, "objective/train/weights_min": 0.23570120334625244, "theoretical_loss": 3.444501337237478, "tokens_seen": 2103705600 }, { "epoch": 0.64, "learning_rate": 0.00036615310544053925, "loss": 0.0694, "theoretical_loss": 3.444501337237478, "tokens_seen": 2103705600 }, { "epoch": 0.64, "learning_rate": 0.00036607286149895686, "loss": 0.0668, "theoretical_loss": 3.4444662000278177, "tokens_seen": 2103967744 }, { "epoch": 0.64, "learning_rate": 0.0003659926175573745, "loss": 0.0627, "theoretical_loss": 3.4444310684214514, "tokens_seen": 2104229888 }, { "epoch": 0.64, "learning_rate": 0.000365912373615792, "loss": 0.064, "theoretical_loss": 3.444395942416789, "tokens_seen": 2104492032 }, { "epoch": 0.64, "learning_rate": 0.0003658321296742096, "loss": 0.0654, "theoretical_loss": 3.4443608220122384, "tokens_seen": 2104754176 }, { "epoch": 0.64, "learning_rate": 0.0003657518857326272, "loss": 0.0668, "theoretical_loss": 3.44432570720621, "tokens_seen": 2105016320 }, { "epoch": 0.64, "learning_rate": 0.00036567164179104476, "loss": 0.0623, "theoretical_loss": 3.4442905979971146, "tokens_seen": 2105278464 }, { "epoch": 0.64, "learning_rate": 0.0003655913978494624, "loss": 0.0672, "theoretical_loss": 3.4442554943833628, "tokens_seen": 2105540608 }, { "epoch": 0.64, "learning_rate": 0.00036551115390788, "loss": 0.0673, "theoretical_loss": 3.444220396363367, "tokens_seen": 2105802752 }, { "epoch": 0.64, "learning_rate": 0.0003654309099662976, "loss": 0.0642, "theoretical_loss": 3.444185303935539, "tokens_seen": 2106064896 }, { "epoch": 0.64, "learning_rate": 0.0003653506660247151, "loss": 0.0659, "theoretical_loss": 3.4441502170982927, "tokens_seen": 2106327040 }, { "epoch": 0.64, "learning_rate": 0.0003652704220831327, "loss": 0.0689, "theoretical_loss": 3.444115135850041, "tokens_seen": 2106589184 }, { "epoch": 0.64, "learning_rate": 0.00036519017814155033, "loss": 0.0686, "theoretical_loss": 3.444080060189199, "tokens_seen": 2106851328 }, { "epoch": 0.64, "learning_rate": 0.0003651099341999679, "loss": 0.0692, "theoretical_loss": 3.444044990114181, "tokens_seen": 2107113472 }, { "epoch": 0.64, "learning_rate": 0.0003650296902583855, "loss": 0.0677, "theoretical_loss": 3.4440099256234036, "tokens_seen": 2107375616 }, { "epoch": 0.64, "learning_rate": 0.0003649494463168031, "loss": 0.0689, "theoretical_loss": 3.4439748667152825, "tokens_seen": 2107637760 }, { "epoch": 0.64, "learning_rate": 0.00036486920237522067, "loss": 0.0667, "theoretical_loss": 3.443939813388235, "tokens_seen": 2107899904 }, { "epoch": 0.64, "learning_rate": 0.00036478895843363823, "loss": 0.0659, "theoretical_loss": 3.4439047656406783, "tokens_seen": 2108162048 }, { "epoch": 0.64, "learning_rate": 0.00036470871449205584, "loss": 0.0693, "theoretical_loss": 3.443869723471031, "tokens_seen": 2108424192 }, { "epoch": 0.64, "learning_rate": 0.00036462847055047346, "loss": 0.067, "theoretical_loss": 3.443834686877712, "tokens_seen": 2108686336 }, { "epoch": 0.64, "learning_rate": 0.000364548226608891, "loss": 0.0675, "theoretical_loss": 3.443799655859141, "tokens_seen": 2108948480 }, { "epoch": 0.64, "learning_rate": 0.00036446798266730863, "loss": 0.0675, "theoretical_loss": 3.443764630413738, "tokens_seen": 2109210624 }, { "epoch": 0.64, "learning_rate": 0.00036438773872572624, "loss": 0.0656, "theoretical_loss": 3.4437296105399238, "tokens_seen": 2109472768 }, { "epoch": 0.64, "learning_rate": 0.0003643074947841438, "loss": 0.0693, "theoretical_loss": 3.4436945962361203, "tokens_seen": 2109734912 }, { "epoch": 0.64, "learning_rate": 0.0003642272508425614, "loss": 0.0665, "theoretical_loss": 3.4436595875007487, "tokens_seen": 2109997056 }, { "epoch": 0.64, "objective/train/advantage_avg": -6.150845001684502e-05, "objective/train/docs_used": 769058, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.341748595237732, "objective/train/original_loss": 1.341748595237732, "objective/train/theoretical_loss": 3.443624584332233, "objective/train/tokens_used": 2130719200, "objective/train/value_avg": -0.0086822509765625, "objective/train/value_loss": 0.00023672039969824255, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.70703125, "objective/train/value_reward_corr": 0.7215973019732559, "objective/train/value_std": 0.0160980224609375, "objective/train/weight_avg": 1.0000429153442383, "objective/train/weighted_lm_loss": 1.3412035703659058, "objective/train/weights_max": 1.2542415857315063, "objective/train/weights_min": 0.3687976598739624, "theoretical_loss": 3.443624584332233, "tokens_seen": 2110259200 }, { "epoch": 0.64, "learning_rate": 0.00036414700690097897, "loss": 0.0673, "theoretical_loss": 3.443624584332233, "tokens_seen": 2110259200 }, { "epoch": 0.64, "learning_rate": 0.0003640667629593966, "loss": 0.0678, "theoretical_loss": 3.4435895867289963, "tokens_seen": 2110521344 }, { "epoch": 0.64, "learning_rate": 0.00036398651901781414, "loss": 0.0668, "theoretical_loss": 3.443554594689462, "tokens_seen": 2110783488 }, { "epoch": 0.64, "learning_rate": 0.00036390627507623175, "loss": 0.0638, "theoretical_loss": 3.443519608212055, "tokens_seen": 2111045632 }, { "epoch": 0.64, "learning_rate": 0.00036382603113464937, "loss": 0.0638, "theoretical_loss": 3.4434846272952013, "tokens_seen": 2111307776 }, { "epoch": 0.64, "learning_rate": 0.0003637457871930669, "loss": 0.0669, "theoretical_loss": 3.4434496519373265, "tokens_seen": 2111569920 }, { "epoch": 0.64, "learning_rate": 0.00036366554325148454, "loss": 0.0684, "theoretical_loss": 3.443414682136857, "tokens_seen": 2111832064 }, { "epoch": 0.64, "learning_rate": 0.0003635852993099021, "loss": 0.0664, "theoretical_loss": 3.4433797178922205, "tokens_seen": 2112094208 }, { "epoch": 0.64, "learning_rate": 0.0003635050553683197, "loss": 0.0646, "theoretical_loss": 3.4433447592018447, "tokens_seen": 2112356352 }, { "epoch": 0.64, "learning_rate": 0.00036342481142673727, "loss": 0.0673, "theoretical_loss": 3.443309806064158, "tokens_seen": 2112618496 }, { "epoch": 0.64, "learning_rate": 0.0003633445674851549, "loss": 0.0663, "theoretical_loss": 3.44327485847759, "tokens_seen": 2112880640 }, { "epoch": 0.64, "learning_rate": 0.0003632643235435725, "loss": 0.0669, "theoretical_loss": 3.44323991644057, "tokens_seen": 2113142784 }, { "epoch": 0.64, "learning_rate": 0.00036318407960199005, "loss": 0.065, "theoretical_loss": 3.443204979951529, "tokens_seen": 2113404928 }, { "epoch": 0.64, "learning_rate": 0.00036310383566040766, "loss": 0.0642, "theoretical_loss": 3.4431700490088977, "tokens_seen": 2113667072 }, { "epoch": 0.64, "learning_rate": 0.0003630235917188253, "loss": 0.0675, "theoretical_loss": 3.443135123611108, "tokens_seen": 2113929216 }, { "epoch": 0.64, "learning_rate": 0.0003629433477772428, "loss": 0.0669, "theoretical_loss": 3.4431002037565923, "tokens_seen": 2114191360 }, { "epoch": 0.64, "learning_rate": 0.0003628631038356604, "loss": 0.0657, "theoretical_loss": 3.4430652894437834, "tokens_seen": 2114453504 }, { "epoch": 0.64, "learning_rate": 0.000362782859894078, "loss": 0.0698, "theoretical_loss": 3.4430303806711158, "tokens_seen": 2114715648 }, { "epoch": 0.64, "learning_rate": 0.0003627026159524956, "loss": 0.0678, "theoretical_loss": 3.442995477437023, "tokens_seen": 2114977792 }, { "epoch": 0.64, "learning_rate": 0.0003626223720109132, "loss": 0.066, "theoretical_loss": 3.44296057973994, "tokens_seen": 2115239936 }, { "epoch": 0.64, "learning_rate": 0.0003625421280693308, "loss": 0.067, "theoretical_loss": 3.442925687578302, "tokens_seen": 2115502080 }, { "epoch": 0.64, "learning_rate": 0.0003624618841277484, "loss": 0.0674, "theoretical_loss": 3.442890800950546, "tokens_seen": 2115764224 }, { "epoch": 0.64, "learning_rate": 0.0003623816401861659, "loss": 0.0637, "theoretical_loss": 3.442855919855109, "tokens_seen": 2116026368 }, { "epoch": 0.64, "learning_rate": 0.0003623013962445835, "loss": 0.0638, "theoretical_loss": 3.4428210442904277, "tokens_seen": 2116288512 }, { "epoch": 0.64, "learning_rate": 0.00036222115230300113, "loss": 0.0654, "theoretical_loss": 3.4427861742549406, "tokens_seen": 2116550656 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.0007084216340444982, "objective/train/docs_used": 771474, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3125371932983398, "objective/train/original_loss": 1.3125371932983398, "objective/train/theoretical_loss": 3.442751309747086, "objective/train/tokens_used": 2137272800, "objective/train/value_avg": -0.007656097412109375, "objective/train/value_loss": 0.0002038901875494048, "objective/train/value_max": -5.4776668548583984e-05, "objective/train/value_min": -0.394287109375, "objective/train/value_reward_corr": 0.7548622300814178, "objective/train/value_std": 0.0170745849609375, "objective/train/weight_avg": 1.000800609588623, "objective/train/weighted_lm_loss": 1.3134653568267822, "objective/train/weights_max": 1.2419233322143555, "objective/train/weights_min": 0.38047337532043457, "theoretical_loss": 3.442751309747086, "tokens_seen": 2116812800 }, { "epoch": 0.64, "learning_rate": 0.00036214090836141874, "loss": 0.0648, "theoretical_loss": 3.442751309747086, "tokens_seen": 2116812800 }, { "epoch": 0.64, "learning_rate": 0.0003620606644198363, "loss": 0.0664, "theoretical_loss": 3.442716450765304, "tokens_seen": 2117074944 }, { "epoch": 0.64, "learning_rate": 0.0003619804204782539, "loss": 0.0669, "theoretical_loss": 3.4426815973080345, "tokens_seen": 2117337088 }, { "epoch": 0.64, "learning_rate": 0.0003619001765366715, "loss": 0.0686, "theoretical_loss": 3.4426467493737176, "tokens_seen": 2117599232 }, { "epoch": 0.64, "learning_rate": 0.00036181993259508903, "loss": 0.0664, "theoretical_loss": 3.4426119069607948, "tokens_seen": 2117861376 }, { "epoch": 0.64, "learning_rate": 0.00036173968865350664, "loss": 0.0676, "theoretical_loss": 3.4425770700677085, "tokens_seen": 2118123520 }, { "epoch": 0.64, "learning_rate": 0.00036165944471192426, "loss": 0.0644, "theoretical_loss": 3.4425422386929005, "tokens_seen": 2118385664 }, { "epoch": 0.64, "learning_rate": 0.00036157920077034187, "loss": 0.0656, "theoretical_loss": 3.4425074128348148, "tokens_seen": 2118647808 }, { "epoch": 0.64, "learning_rate": 0.0003614989568287594, "loss": 0.0665, "theoretical_loss": 3.442472592491894, "tokens_seen": 2118909952 }, { "epoch": 0.64, "learning_rate": 0.00036141871288717704, "loss": 0.0671, "theoretical_loss": 3.442437777662584, "tokens_seen": 2119172096 }, { "epoch": 0.64, "learning_rate": 0.00036133846894559465, "loss": 0.0679, "theoretical_loss": 3.4424029683453288, "tokens_seen": 2119434240 }, { "epoch": 0.64, "learning_rate": 0.0003612582250040122, "loss": 0.0683, "theoretical_loss": 3.442368164538575, "tokens_seen": 2119696384 }, { "epoch": 0.64, "learning_rate": 0.00036117798106242977, "loss": 0.0655, "theoretical_loss": 3.4423333662407676, "tokens_seen": 2119958528 }, { "epoch": 0.64, "learning_rate": 0.0003610977371208474, "loss": 0.0673, "theoretical_loss": 3.442298573450355, "tokens_seen": 2120220672 }, { "epoch": 0.64, "learning_rate": 0.00036101749317926494, "loss": 0.0661, "theoretical_loss": 3.4422637861657837, "tokens_seen": 2120482816 }, { "epoch": 0.64, "learning_rate": 0.00036093724923768255, "loss": 0.066, "theoretical_loss": 3.442229004385502, "tokens_seen": 2120744960 }, { "epoch": 0.64, "learning_rate": 0.00036085700529610017, "loss": 0.0661, "theoretical_loss": 3.44219422810796, "tokens_seen": 2121007104 }, { "epoch": 0.64, "learning_rate": 0.0003607767613545178, "loss": 0.0646, "theoretical_loss": 3.4421594573316057, "tokens_seen": 2121269248 }, { "epoch": 0.64, "learning_rate": 0.00036069651741293534, "loss": 0.0672, "theoretical_loss": 3.44212469205489, "tokens_seen": 2121531392 }, { "epoch": 0.64, "learning_rate": 0.0003606162734713529, "loss": 0.0693, "theoretical_loss": 3.4420899322762635, "tokens_seen": 2121793536 }, { "epoch": 0.64, "learning_rate": 0.0003605360295297705, "loss": 0.0682, "theoretical_loss": 3.4420551779941775, "tokens_seen": 2122055680 }, { "epoch": 0.64, "learning_rate": 0.00036045578558818807, "loss": 0.0676, "theoretical_loss": 3.4420204292070844, "tokens_seen": 2122317824 }, { "epoch": 0.64, "learning_rate": 0.0003603755416466057, "loss": 0.066, "theoretical_loss": 3.4419856859134357, "tokens_seen": 2122579968 }, { "epoch": 0.64, "learning_rate": 0.0003602952977050233, "loss": 0.0664, "theoretical_loss": 3.4419509481116854, "tokens_seen": 2122842112 }, { "epoch": 0.64, "learning_rate": 0.0003602150537634409, "loss": 0.0691, "theoretical_loss": 3.441916215800288, "tokens_seen": 2123104256 }, { "epoch": 0.64, "objective/train/advantage_avg": -0.0004949113936163485, "objective/train/docs_used": 773937, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2716563940048218, "objective/train/original_loss": 1.2716562747955322, "objective/train/theoretical_loss": 3.441881488977697, "objective/train/tokens_used": 2143826400, "objective/train/value_avg": -0.01212310791015625, "objective/train/value_loss": 0.0005141652654856443, "objective/train/value_max": -2.1278858184814453e-05, "objective/train/value_min": -0.85888671875, "objective/train/value_reward_corr": 0.7393573499560793, "objective/train/value_std": 0.0230712890625, "objective/train/weight_avg": 0.9997339248657227, "objective/train/weighted_lm_loss": 1.2710819244384766, "objective/train/weights_max": 1.5846818685531616, "objective/train/weights_min": 0.3693798780441284, "theoretical_loss": 3.441881488977697, "tokens_seen": 2123366400 }, { "epoch": 0.64, "learning_rate": 0.00036013480982185846, "loss": 0.0636, "theoretical_loss": 3.441881488977697, "tokens_seen": 2123366400 }, { "epoch": 0.64, "learning_rate": 0.000360054565880276, "loss": 0.0633, "theoretical_loss": 3.441846767642368, "tokens_seen": 2123628544 }, { "epoch": 0.64, "learning_rate": 0.00035997432193869363, "loss": 0.0662, "theoretical_loss": 3.4418120517927564, "tokens_seen": 2123890688 }, { "epoch": 0.64, "learning_rate": 0.0003598940779971112, "loss": 0.0687, "theoretical_loss": 3.441777341427319, "tokens_seen": 2124152832 }, { "epoch": 0.64, "learning_rate": 0.0003598138340555288, "loss": 0.0703, "theoretical_loss": 3.4417426365445127, "tokens_seen": 2124414976 }, { "epoch": 0.64, "learning_rate": 0.0003597335901139464, "loss": 0.0644, "theoretical_loss": 3.4417079371427945, "tokens_seen": 2124677120 }, { "epoch": 0.64, "learning_rate": 0.00035965334617236403, "loss": 0.0666, "theoretical_loss": 3.441673243220624, "tokens_seen": 2124939264 }, { "epoch": 0.64, "learning_rate": 0.0003595731022307816, "loss": 0.0655, "theoretical_loss": 3.4416385547764583, "tokens_seen": 2125201408 }, { "epoch": 0.64, "learning_rate": 0.0003594928582891992, "loss": 0.0699, "theoretical_loss": 3.4416038718087583, "tokens_seen": 2125463552 }, { "epoch": 0.64, "learning_rate": 0.00035941261434761676, "loss": 0.0671, "theoretical_loss": 3.4415691943159836, "tokens_seen": 2125725696 }, { "epoch": 0.64, "learning_rate": 0.0003593323704060343, "loss": 0.0672, "theoretical_loss": 3.441534522296595, "tokens_seen": 2125987840 }, { "epoch": 0.64, "learning_rate": 0.00035925212646445193, "loss": 0.0643, "theoretical_loss": 3.441499855749054, "tokens_seen": 2126249984 }, { "epoch": 0.64, "learning_rate": 0.00035917188252286954, "loss": 0.0649, "theoretical_loss": 3.441465194671822, "tokens_seen": 2126512128 }, { "epoch": 0.64, "learning_rate": 0.00035909163858128715, "loss": 0.0665, "theoretical_loss": 3.4414305390633624, "tokens_seen": 2126774272 }, { "epoch": 0.64, "learning_rate": 0.0003590113946397047, "loss": 0.0649, "theoretical_loss": 3.4413958889221385, "tokens_seen": 2127036416 }, { "epoch": 0.64, "learning_rate": 0.0003589311506981223, "loss": 0.0653, "theoretical_loss": 3.4413612442466133, "tokens_seen": 2127298560 }, { "epoch": 0.64, "learning_rate": 0.0003588509067565399, "loss": 0.0668, "theoretical_loss": 3.441326605035252, "tokens_seen": 2127560704 }, { "epoch": 0.64, "learning_rate": 0.00035877066281495744, "loss": 0.0671, "theoretical_loss": 3.441291971286519, "tokens_seen": 2127822848 }, { "epoch": 0.64, "learning_rate": 0.00035869041887337506, "loss": 0.0643, "theoretical_loss": 3.44125734299888, "tokens_seen": 2128084992 }, { "epoch": 0.64, "learning_rate": 0.00035861017493179267, "loss": 0.068, "theoretical_loss": 3.4412227201708028, "tokens_seen": 2128347136 }, { "epoch": 0.65, "learning_rate": 0.0003585299309902102, "loss": 0.0681, "theoretical_loss": 3.4411881028007527, "tokens_seen": 2128609280 }, { "epoch": 0.65, "learning_rate": 0.00035844968704862784, "loss": 0.0647, "theoretical_loss": 3.4411534908871984, "tokens_seen": 2128871424 }, { "epoch": 0.65, "learning_rate": 0.00035836944310704545, "loss": 0.068, "theoretical_loss": 3.4411188844286067, "tokens_seen": 2129133568 }, { "epoch": 0.65, "learning_rate": 0.00035828919916546306, "loss": 0.0677, "theoretical_loss": 3.441084283423448, "tokens_seen": 2129395712 }, { "epoch": 0.65, "learning_rate": 0.00035820895522388057, "loss": 0.0692, "theoretical_loss": 3.4410496878701906, "tokens_seen": 2129657856 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.00014313262363430113, "objective/train/docs_used": 775953, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3686505556106567, "objective/train/original_loss": 1.3686505556106567, "objective/train/theoretical_loss": 3.441015097767305, "objective/train/tokens_used": 2150380000, "objective/train/value_avg": -0.00724029541015625, "objective/train/value_loss": 0.00016575136396568269, "objective/train/value_max": -6.866455078125e-05, "objective/train/value_min": -0.2313232421875, "objective/train/value_reward_corr": 0.7221669709099741, "objective/train/value_std": 0.01239776611328125, "objective/train/weight_avg": 1.0002208948135376, "objective/train/weighted_lm_loss": 1.3682907819747925, "objective/train/weights_max": 1.2552000284194946, "objective/train/weights_min": 0.3698423504829407, "theoretical_loss": 3.441015097767305, "tokens_seen": 2129920000 }, { "epoch": 0.65, "learning_rate": 0.0003581287112822982, "loss": 0.0652, "theoretical_loss": 3.441015097767305, "tokens_seen": 2129920000 }, { "epoch": 0.65, "learning_rate": 0.0003580484673407158, "loss": 0.069, "theoretical_loss": 3.440980513113262, "tokens_seen": 2130182144 }, { "epoch": 0.65, "learning_rate": 0.00035796822339913335, "loss": 0.0679, "theoretical_loss": 3.4409459339065327, "tokens_seen": 2130444288 }, { "epoch": 0.65, "learning_rate": 0.00035788797945755096, "loss": 0.0705, "theoretical_loss": 3.4409113601455887, "tokens_seen": 2130706432 }, { "epoch": 0.65, "learning_rate": 0.0003578077355159686, "loss": 0.0665, "theoretical_loss": 3.4408767918289027, "tokens_seen": 2130968576 }, { "epoch": 0.65, "learning_rate": 0.0003577274915743862, "loss": 0.0666, "theoretical_loss": 3.4408422289549483, "tokens_seen": 2131230720 }, { "epoch": 0.65, "learning_rate": 0.0003576472476328037, "loss": 0.0657, "theoretical_loss": 3.4408076715221982, "tokens_seen": 2131492864 }, { "epoch": 0.65, "learning_rate": 0.0003575670036912213, "loss": 0.0659, "theoretical_loss": 3.440773119529128, "tokens_seen": 2131755008 }, { "epoch": 0.65, "learning_rate": 0.0003574867597496389, "loss": 0.0678, "theoretical_loss": 3.440738572974212, "tokens_seen": 2132017152 }, { "epoch": 0.65, "learning_rate": 0.0003574065158080565, "loss": 0.0691, "theoretical_loss": 3.440704031855926, "tokens_seen": 2132279296 }, { "epoch": 0.65, "learning_rate": 0.0003573262718664741, "loss": 0.0695, "theoretical_loss": 3.440669496172746, "tokens_seen": 2132541440 }, { "epoch": 0.65, "learning_rate": 0.0003572460279248917, "loss": 0.0684, "theoretical_loss": 3.4406349659231488, "tokens_seen": 2132803584 }, { "epoch": 0.65, "learning_rate": 0.0003571657839833093, "loss": 0.064, "theoretical_loss": 3.4406004411056124, "tokens_seen": 2133065728 }, { "epoch": 0.65, "learning_rate": 0.0003570855400417268, "loss": 0.0643, "theoretical_loss": 3.4405659217186138, "tokens_seen": 2133327872 }, { "epoch": 0.65, "learning_rate": 0.00035700529610014443, "loss": 0.0643, "theoretical_loss": 3.440531407760633, "tokens_seen": 2133590016 }, { "epoch": 0.65, "learning_rate": 0.00035692505215856204, "loss": 0.0674, "theoretical_loss": 3.4404968992301477, "tokens_seen": 2133852160 }, { "epoch": 0.65, "learning_rate": 0.0003568448082169796, "loss": 0.0641, "theoretical_loss": 3.440462396125639, "tokens_seen": 2134114304 }, { "epoch": 0.65, "learning_rate": 0.0003567645642753972, "loss": 0.068, "theoretical_loss": 3.4404278984455874, "tokens_seen": 2134376448 }, { "epoch": 0.65, "learning_rate": 0.00035668432033381483, "loss": 0.069, "theoretical_loss": 3.4403934061884733, "tokens_seen": 2134638592 }, { "epoch": 0.65, "learning_rate": 0.0003566040763922324, "loss": 0.068, "theoretical_loss": 3.4403589193527786, "tokens_seen": 2134900736 }, { "epoch": 0.65, "learning_rate": 0.00035652383245065, "loss": 0.0677, "theoretical_loss": 3.440324437936986, "tokens_seen": 2135162880 }, { "epoch": 0.65, "learning_rate": 0.00035644358850906756, "loss": 0.0662, "theoretical_loss": 3.4402899619395786, "tokens_seen": 2135425024 }, { "epoch": 0.65, "learning_rate": 0.00035636334456748517, "loss": 0.0632, "theoretical_loss": 3.440255491359039, "tokens_seen": 2135687168 }, { "epoch": 0.65, "learning_rate": 0.00035628310062590273, "loss": 0.0672, "theoretical_loss": 3.440221026193852, "tokens_seen": 2135949312 }, { "epoch": 0.65, "learning_rate": 0.00035620285668432034, "loss": 0.0645, "theoretical_loss": 3.4401865664425024, "tokens_seen": 2136211456 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.0002571629884187132, "objective/train/docs_used": 778288, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3589222431182861, "objective/train/original_loss": 1.3589223623275757, "objective/train/theoretical_loss": 3.440152112103476, "objective/train/tokens_used": 2156933600, "objective/train/value_avg": -0.00910186767578125, "objective/train/value_loss": 0.00038075895281508565, "objective/train/value_max": -4.363059997558594e-05, "objective/train/value_min": -0.90185546875, "objective/train/value_reward_corr": 0.6478648047552944, "objective/train/value_std": 0.016845703125, "objective/train/weight_avg": 1.0004286766052246, "objective/train/weighted_lm_loss": 1.358363389968872, "objective/train/weights_max": 1.8600143194198608, "objective/train/weights_min": 0.3690122961997986, "theoretical_loss": 3.440152112103476, "tokens_seen": 2136473600 }, { "epoch": 0.65, "learning_rate": 0.00035612261274273795, "loss": 0.0661, "theoretical_loss": 3.440152112103476, "tokens_seen": 2136473600 }, { "epoch": 0.65, "learning_rate": 0.0003560423688011555, "loss": 0.0669, "theoretical_loss": 3.4401176631752577, "tokens_seen": 2136735744 }, { "epoch": 0.65, "learning_rate": 0.0003559621248595731, "loss": 0.0658, "theoretical_loss": 3.4400832196563353, "tokens_seen": 2136997888 }, { "epoch": 0.65, "learning_rate": 0.0003558818809179907, "loss": 0.0669, "theoretical_loss": 3.440048781545195, "tokens_seen": 2137260032 }, { "epoch": 0.65, "learning_rate": 0.0003558016369764083, "loss": 0.0691, "theoretical_loss": 3.440014348840325, "tokens_seen": 2137522176 }, { "epoch": 0.65, "learning_rate": 0.00035572139303482585, "loss": 0.0674, "theoretical_loss": 3.439979921540214, "tokens_seen": 2137784320 }, { "epoch": 0.65, "learning_rate": 0.00035564114909324347, "loss": 0.0666, "theoretical_loss": 3.4399454996433514, "tokens_seen": 2138046464 }, { "epoch": 0.65, "learning_rate": 0.0003555609051516611, "loss": 0.0686, "theoretical_loss": 3.4399110831482256, "tokens_seen": 2138308608 }, { "epoch": 0.65, "learning_rate": 0.00035548066121007864, "loss": 0.0673, "theoretical_loss": 3.4398766720533276, "tokens_seen": 2138570752 }, { "epoch": 0.65, "learning_rate": 0.00035540041726849625, "loss": 0.0692, "theoretical_loss": 3.4398422663571484, "tokens_seen": 2138832896 }, { "epoch": 0.65, "learning_rate": 0.0003553201733269138, "loss": 0.0663, "theoretical_loss": 3.439807866058179, "tokens_seen": 2139095040 }, { "epoch": 0.65, "learning_rate": 0.0003552399293853314, "loss": 0.0697, "theoretical_loss": 3.439773471154912, "tokens_seen": 2139357184 }, { "epoch": 0.65, "learning_rate": 0.000355159685443749, "loss": 0.0675, "theoretical_loss": 3.4397390816458397, "tokens_seen": 2139619328 }, { "epoch": 0.65, "learning_rate": 0.0003550794415021666, "loss": 0.0674, "theoretical_loss": 3.4397046975294554, "tokens_seen": 2139881472 }, { "epoch": 0.65, "learning_rate": 0.0003549991975605842, "loss": 0.069, "theoretical_loss": 3.4396703188042537, "tokens_seen": 2140143616 }, { "epoch": 0.65, "learning_rate": 0.00035491895361900176, "loss": 0.0653, "theoretical_loss": 3.4396359454687278, "tokens_seen": 2140405760 }, { "epoch": 0.65, "learning_rate": 0.0003548387096774194, "loss": 0.0649, "theoretical_loss": 3.4396015775213735, "tokens_seen": 2140667904 }, { "epoch": 0.65, "learning_rate": 0.000354758465735837, "loss": 0.0664, "theoretical_loss": 3.439567214960687, "tokens_seen": 2140930048 }, { "epoch": 0.65, "learning_rate": 0.0003546782217942545, "loss": 0.0648, "theoretical_loss": 3.439532857785164, "tokens_seen": 2141192192 }, { "epoch": 0.65, "learning_rate": 0.0003545979778526721, "loss": 0.0658, "theoretical_loss": 3.4394985059933014, "tokens_seen": 2141454336 }, { "epoch": 0.65, "learning_rate": 0.0003545177339110897, "loss": 0.0667, "theoretical_loss": 3.439464159583597, "tokens_seen": 2141716480 }, { "epoch": 0.65, "learning_rate": 0.00035443748996950733, "loss": 0.0654, "theoretical_loss": 3.4394298185545487, "tokens_seen": 2141978624 }, { "epoch": 0.65, "learning_rate": 0.0003543572460279249, "loss": 0.0683, "theoretical_loss": 3.439395482904655, "tokens_seen": 2142240768 }, { "epoch": 0.65, "learning_rate": 0.0003542770020863425, "loss": 0.0649, "theoretical_loss": 3.439361152632416, "tokens_seen": 2142502912 }, { "epoch": 0.65, "learning_rate": 0.0003541967581447601, "loss": 0.0662, "theoretical_loss": 3.4393268277363305, "tokens_seen": 2142765056 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.00045191447134129703, "objective/train/docs_used": 780500, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3032023906707764, "objective/train/original_loss": 1.3032021522521973, "objective/train/theoretical_loss": 3.4392925082149004, "objective/train/tokens_used": 2163487200, "objective/train/value_avg": -0.006439208984375, "objective/train/value_loss": 0.00019448416423983872, "objective/train/value_max": -4.363059997558594e-05, "objective/train/value_min": -0.46142578125, "objective/train/value_reward_corr": 0.6783022603004962, "objective/train/value_std": 0.012542724609375, "objective/train/weight_avg": 1.0005395412445068, "objective/train/weighted_lm_loss": 1.3039817810058594, "objective/train/weights_max": 1.1519769430160522, "objective/train/weights_min": 0.3689151704311371, "theoretical_loss": 3.4392925082149004, "tokens_seen": 2143027200 }, { "epoch": 0.65, "learning_rate": 0.0003541165142031776, "loss": 0.0683, "theoretical_loss": 3.4392925082149004, "tokens_seen": 2143027200 }, { "epoch": 0.65, "learning_rate": 0.00035403627026159523, "loss": 0.0679, "theoretical_loss": 3.4392581940666256, "tokens_seen": 2143289344 }, { "epoch": 0.65, "learning_rate": 0.00035395602632001284, "loss": 0.0647, "theoretical_loss": 3.4392238852900086, "tokens_seen": 2143551488 }, { "epoch": 0.65, "learning_rate": 0.00035387578237843046, "loss": 0.0665, "theoretical_loss": 3.4391895818835514, "tokens_seen": 2143813632 }, { "epoch": 0.65, "learning_rate": 0.000353795538436848, "loss": 0.0639, "theoretical_loss": 3.439155283845757, "tokens_seen": 2144075776 }, { "epoch": 0.65, "learning_rate": 0.00035371529449526563, "loss": 0.0688, "theoretical_loss": 3.4391209911751286, "tokens_seen": 2144337920 }, { "epoch": 0.65, "learning_rate": 0.00035363505055368324, "loss": 0.0654, "theoretical_loss": 3.4390867038701716, "tokens_seen": 2144600064 }, { "epoch": 0.65, "learning_rate": 0.0003535548066121008, "loss": 0.0669, "theoretical_loss": 3.439052421929389, "tokens_seen": 2144862208 }, { "epoch": 0.65, "learning_rate": 0.00035347456267051836, "loss": 0.0646, "theoretical_loss": 3.439018145351287, "tokens_seen": 2145124352 }, { "epoch": 0.65, "learning_rate": 0.00035339431872893597, "loss": 0.0681, "theoretical_loss": 3.4389838741343715, "tokens_seen": 2145386496 }, { "epoch": 0.65, "learning_rate": 0.0003533140747873536, "loss": 0.0661, "theoretical_loss": 3.438949608277149, "tokens_seen": 2145648640 }, { "epoch": 0.65, "learning_rate": 0.00035323383084577114, "loss": 0.065, "theoretical_loss": 3.438915347778127, "tokens_seen": 2145910784 }, { "epoch": 0.65, "learning_rate": 0.00035315358690418875, "loss": 0.0686, "theoretical_loss": 3.4388810926358127, "tokens_seen": 2146172928 }, { "epoch": 0.65, "learning_rate": 0.00035307334296260637, "loss": 0.0652, "theoretical_loss": 3.4388468428487142, "tokens_seen": 2146435072 }, { "epoch": 0.65, "learning_rate": 0.0003529930990210239, "loss": 0.0658, "theoretical_loss": 3.4388125984153413, "tokens_seen": 2146697216 }, { "epoch": 0.65, "learning_rate": 0.0003529128550794415, "loss": 0.0666, "theoretical_loss": 3.4387783593342025, "tokens_seen": 2146959360 }, { "epoch": 0.65, "learning_rate": 0.0003528326111378591, "loss": 0.07, "theoretical_loss": 3.438744125603809, "tokens_seen": 2147221504 }, { "epoch": 0.65, "learning_rate": 0.00035275236719627665, "loss": 0.0652, "theoretical_loss": 3.4387098972226706, "tokens_seen": 2147483648 }, { "epoch": 0.65, "learning_rate": 0.00035267212325469427, "loss": 0.0623, "theoretical_loss": 3.4386756741892994, "tokens_seen": 2147745792 }, { "epoch": 0.65, "learning_rate": 0.0003525918793131119, "loss": 0.0694, "theoretical_loss": 3.4386414565022063, "tokens_seen": 2148007936 }, { "epoch": 0.65, "learning_rate": 0.0003525116353715295, "loss": 0.0669, "theoretical_loss": 3.438607244159905, "tokens_seen": 2148270080 }, { "epoch": 0.65, "learning_rate": 0.00035243139142994705, "loss": 0.0656, "theoretical_loss": 3.4385730371609076, "tokens_seen": 2148532224 }, { "epoch": 0.65, "learning_rate": 0.0003523511474883646, "loss": 0.0672, "theoretical_loss": 3.438538835503728, "tokens_seen": 2148794368 }, { "epoch": 0.65, "learning_rate": 0.0003522709035467822, "loss": 0.0679, "theoretical_loss": 3.438504639186881, "tokens_seen": 2149056512 }, { "epoch": 0.65, "learning_rate": 0.0003521906596051998, "loss": 0.0654, "theoretical_loss": 3.4384704482088813, "tokens_seen": 2149318656 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.00025573305902071297, "objective/train/docs_used": 782928, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.186272382736206, "objective/train/original_loss": 1.186272144317627, "objective/train/theoretical_loss": 3.4384362625682434, "objective/train/tokens_used": 2170040800, "objective/train/value_avg": -0.006961822509765625, "objective/train/value_loss": 0.00010668325558071956, "objective/train/value_max": -3.427267074584961e-05, "objective/train/value_min": -0.501953125, "objective/train/value_reward_corr": 0.7916194071649223, "objective/train/value_std": 0.012664794921875, "objective/train/weight_avg": 1.0003083944320679, "objective/train/weighted_lm_loss": 1.1869118213653564, "objective/train/weights_max": 1.146925687789917, "objective/train/weights_min": 0.8217722177505493, "theoretical_loss": 3.4384362625682434, "tokens_seen": 2149580800 }, { "epoch": 0.65, "learning_rate": 0.0003521104156636174, "loss": 0.0637, "theoretical_loss": 3.4384362625682434, "tokens_seen": 2149580800 }, { "epoch": 0.65, "learning_rate": 0.000352030171722035, "loss": 0.0656, "theoretical_loss": 3.438402082263485, "tokens_seen": 2149842944 }, { "epoch": 0.65, "learning_rate": 0.0003519499277804526, "loss": 0.0681, "theoretical_loss": 3.4383679072931215, "tokens_seen": 2150105088 }, { "epoch": 0.65, "learning_rate": 0.0003518696838388702, "loss": 0.0655, "theoretical_loss": 3.438333737655671, "tokens_seen": 2150367232 }, { "epoch": 0.65, "learning_rate": 0.0003517894398972878, "loss": 0.0688, "theoretical_loss": 3.43829957334965, "tokens_seen": 2150629376 }, { "epoch": 0.65, "learning_rate": 0.00035170919595570535, "loss": 0.0655, "theoretical_loss": 3.4382654143735785, "tokens_seen": 2150891520 }, { "epoch": 0.65, "learning_rate": 0.0003516289520141229, "loss": 0.0679, "theoretical_loss": 3.438231260725975, "tokens_seen": 2151153664 }, { "epoch": 0.65, "learning_rate": 0.0003515487080725405, "loss": 0.0662, "theoretical_loss": 3.4381971124053594, "tokens_seen": 2151415808 }, { "epoch": 0.65, "learning_rate": 0.00035146846413095813, "loss": 0.0683, "theoretical_loss": 3.438162969410251, "tokens_seen": 2151677952 }, { "epoch": 0.65, "learning_rate": 0.00035138822018937574, "loss": 0.0673, "theoretical_loss": 3.438128831739171, "tokens_seen": 2151940096 }, { "epoch": 0.65, "learning_rate": 0.0003513079762477933, "loss": 0.0687, "theoretical_loss": 3.4380946993906414, "tokens_seen": 2152202240 }, { "epoch": 0.65, "learning_rate": 0.0003512277323062109, "loss": 0.0664, "theoretical_loss": 3.4380605723631836, "tokens_seen": 2152464384 }, { "epoch": 0.65, "learning_rate": 0.00035114748836462847, "loss": 0.0676, "theoretical_loss": 3.4380264506553204, "tokens_seen": 2152726528 }, { "epoch": 0.65, "learning_rate": 0.00035106724442304603, "loss": 0.0669, "theoretical_loss": 3.4379923342655747, "tokens_seen": 2152988672 }, { "epoch": 0.65, "learning_rate": 0.00035098700048146364, "loss": 0.0671, "theoretical_loss": 3.437958223192471, "tokens_seen": 2153250816 }, { "epoch": 0.65, "learning_rate": 0.00035090675653988126, "loss": 0.0693, "theoretical_loss": 3.4379241174345325, "tokens_seen": 2153512960 }, { "epoch": 0.65, "learning_rate": 0.00035082651259829887, "loss": 0.0635, "theoretical_loss": 3.437890016990285, "tokens_seen": 2153775104 }, { "epoch": 0.65, "learning_rate": 0.0003507462686567164, "loss": 0.0669, "theoretical_loss": 3.4378559218582536, "tokens_seen": 2154037248 }, { "epoch": 0.65, "learning_rate": 0.00035066602471513404, "loss": 0.0651, "theoretical_loss": 3.4378218320369647, "tokens_seen": 2154299392 }, { "epoch": 0.65, "learning_rate": 0.0003505857807735516, "loss": 0.0652, "theoretical_loss": 3.437787747524945, "tokens_seen": 2154561536 }, { "epoch": 0.65, "learning_rate": 0.00035050553683196916, "loss": 0.0665, "theoretical_loss": 3.4377536683207217, "tokens_seen": 2154823680 }, { "epoch": 0.65, "learning_rate": 0.00035042529289038677, "loss": 0.0672, "theoretical_loss": 3.4377195944228225, "tokens_seen": 2155085824 }, { "epoch": 0.65, "learning_rate": 0.0003503450489488044, "loss": 0.0654, "theoretical_loss": 3.4376855258297763, "tokens_seen": 2155347968 }, { "epoch": 0.65, "learning_rate": 0.00035026480500722194, "loss": 0.0664, "theoretical_loss": 3.4376514625401113, "tokens_seen": 2155610112 }, { "epoch": 0.65, "learning_rate": 0.00035018456106563955, "loss": 0.0665, "theoretical_loss": 3.437617404552358, "tokens_seen": 2155872256 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.00036321027437224984, "objective/train/docs_used": 785270, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3421927690505981, "objective/train/original_loss": 1.3421927690505981, "objective/train/theoretical_loss": 3.4375833518650465, "objective/train/tokens_used": 2176594400, "objective/train/value_avg": -0.0075225830078125, "objective/train/value_loss": 0.00023389572743326426, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.8916015625, "objective/train/value_reward_corr": 0.7332820517109091, "objective/train/value_std": 0.0157470703125, "objective/train/weight_avg": 1.000470519065857, "objective/train/weighted_lm_loss": 1.3425078392028809, "objective/train/weights_max": 2.007188320159912, "objective/train/weights_min": 0.24055545032024384, "theoretical_loss": 3.4375833518650465, "tokens_seen": 2156134400 }, { "epoch": 0.65, "learning_rate": 0.00035010431712405717, "loss": 0.0679, "theoretical_loss": 3.4375833518650465, "tokens_seen": 2156134400 }, { "epoch": 0.65, "learning_rate": 0.0003500240731824748, "loss": 0.0662, "theoretical_loss": 3.4375493044767076, "tokens_seen": 2156396544 }, { "epoch": 0.65, "learning_rate": 0.0003499438292408923, "loss": 0.065, "theoretical_loss": 3.4375152623858725, "tokens_seen": 2156658688 }, { "epoch": 0.65, "learning_rate": 0.0003498635852993099, "loss": 0.0664, "theoretical_loss": 3.437481225591073, "tokens_seen": 2156920832 }, { "epoch": 0.65, "learning_rate": 0.0003497833413577275, "loss": 0.0664, "theoretical_loss": 3.4374471940908418, "tokens_seen": 2157182976 }, { "epoch": 0.65, "learning_rate": 0.00034970309741614507, "loss": 0.0648, "theoretical_loss": 3.4374131678837125, "tokens_seen": 2157445120 }, { "epoch": 0.65, "learning_rate": 0.0003496228534745627, "loss": 0.0661, "theoretical_loss": 3.4373791469682184, "tokens_seen": 2157707264 }, { "epoch": 0.65, "learning_rate": 0.0003495426095329803, "loss": 0.0662, "theoretical_loss": 3.437345131342894, "tokens_seen": 2157969408 }, { "epoch": 0.65, "learning_rate": 0.0003494623655913979, "loss": 0.0665, "theoretical_loss": 3.437311121006274, "tokens_seen": 2158231552 }, { "epoch": 0.65, "learning_rate": 0.0003493821216498154, "loss": 0.0669, "theoretical_loss": 3.4372771159568942, "tokens_seen": 2158493696 }, { "epoch": 0.65, "learning_rate": 0.000349301877708233, "loss": 0.0678, "theoretical_loss": 3.43724311619329, "tokens_seen": 2158755840 }, { "epoch": 0.65, "learning_rate": 0.00034922163376665063, "loss": 0.0662, "theoretical_loss": 3.437209121713999, "tokens_seen": 2159017984 }, { "epoch": 0.65, "learning_rate": 0.0003491413898250682, "loss": 0.0669, "theoretical_loss": 3.4371751325175586, "tokens_seen": 2159280128 }, { "epoch": 0.65, "learning_rate": 0.0003490611458834858, "loss": 0.0656, "theoretical_loss": 3.437141148602505, "tokens_seen": 2159542272 }, { "epoch": 0.65, "learning_rate": 0.0003489809019419034, "loss": 0.0645, "theoretical_loss": 3.437107169967378, "tokens_seen": 2159804416 }, { "epoch": 0.65, "learning_rate": 0.00034890065800032103, "loss": 0.0672, "theoretical_loss": 3.437073196610716, "tokens_seen": 2160066560 }, { "epoch": 0.65, "learning_rate": 0.0003488204140587386, "loss": 0.0643, "theoretical_loss": 3.4370392285310594, "tokens_seen": 2160328704 }, { "epoch": 0.65, "learning_rate": 0.00034874017011715615, "loss": 0.0642, "theoretical_loss": 3.437005265726947, "tokens_seen": 2160590848 }, { "epoch": 0.65, "learning_rate": 0.00034865992617557376, "loss": 0.0654, "theoretical_loss": 3.4369713081969206, "tokens_seen": 2160852992 }, { "epoch": 0.65, "learning_rate": 0.0003485796822339913, "loss": 0.068, "theoretical_loss": 3.436937355939521, "tokens_seen": 2161115136 }, { "epoch": 0.65, "learning_rate": 0.00034849943829240893, "loss": 0.0683, "theoretical_loss": 3.4369034089532904, "tokens_seen": 2161377280 }, { "epoch": 0.66, "learning_rate": 0.00034841919435082654, "loss": 0.0655, "theoretical_loss": 3.436869467236771, "tokens_seen": 2161639424 }, { "epoch": 0.66, "learning_rate": 0.0003483389504092441, "loss": 0.0644, "theoretical_loss": 3.4368355307885063, "tokens_seen": 2161901568 }, { "epoch": 0.66, "learning_rate": 0.0003482587064676617, "loss": 0.0667, "theoretical_loss": 3.436801599607039, "tokens_seen": 2162163712 }, { "epoch": 0.66, "learning_rate": 0.00034817846252607927, "loss": 0.0681, "theoretical_loss": 3.4367676736909143, "tokens_seen": 2162425856 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.0004280103603377938, "objective/train/docs_used": 787745, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.441954493522644, "objective/train/original_loss": 1.4419546127319336, "objective/train/theoretical_loss": 3.4367337530386766, "objective/train/tokens_used": 2183148000, "objective/train/value_avg": -0.00835418701171875, "objective/train/value_loss": 0.00033444137079641223, "objective/train/value_max": -5.829334259033203e-05, "objective/train/value_min": -0.71337890625, "objective/train/value_reward_corr": 0.7118774960473635, "objective/train/value_std": 0.018341064453125, "objective/train/weight_avg": 1.0005724430084229, "objective/train/weighted_lm_loss": 1.4417777061462402, "objective/train/weights_max": 1.43697190284729, "objective/train/weights_min": 0.22480729222297668, "theoretical_loss": 3.4367337530386766, "tokens_seen": 2162688000 }, { "epoch": 0.66, "learning_rate": 0.0003480982185844969, "loss": 0.067, "theoretical_loss": 3.4367337530386766, "tokens_seen": 2162688000 }, { "epoch": 0.66, "learning_rate": 0.00034801797464291444, "loss": 0.0679, "theoretical_loss": 3.436699837648871, "tokens_seen": 2162950144 }, { "epoch": 0.66, "learning_rate": 0.00034793773070133206, "loss": 0.0692, "theoretical_loss": 3.4366659275200444, "tokens_seen": 2163212288 }, { "epoch": 0.66, "learning_rate": 0.00034785748675974967, "loss": 0.0677, "theoretical_loss": 3.436632022650742, "tokens_seen": 2163474432 }, { "epoch": 0.66, "learning_rate": 0.0003477772428181672, "loss": 0.0683, "theoretical_loss": 3.4365981230395115, "tokens_seen": 2163736576 }, { "epoch": 0.66, "learning_rate": 0.00034769699887658484, "loss": 0.0653, "theoretical_loss": 3.436564228684901, "tokens_seen": 2163998720 }, { "epoch": 0.66, "learning_rate": 0.0003476167549350024, "loss": 0.0661, "theoretical_loss": 3.436530339585458, "tokens_seen": 2164260864 }, { "epoch": 0.66, "learning_rate": 0.00034753651099342, "loss": 0.0668, "theoretical_loss": 3.4364964557397317, "tokens_seen": 2164523008 }, { "epoch": 0.66, "learning_rate": 0.00034745626705183757, "loss": 0.0663, "theoretical_loss": 3.436462577146272, "tokens_seen": 2164785152 }, { "epoch": 0.66, "learning_rate": 0.0003473760231102552, "loss": 0.0661, "theoretical_loss": 3.4364287038036276, "tokens_seen": 2165047296 }, { "epoch": 0.66, "learning_rate": 0.0003472957791686728, "loss": 0.067, "theoretical_loss": 3.4363948357103506, "tokens_seen": 2165309440 }, { "epoch": 0.66, "learning_rate": 0.00034721553522709035, "loss": 0.0668, "theoretical_loss": 3.436360972864991, "tokens_seen": 2165571584 }, { "epoch": 0.66, "learning_rate": 0.00034713529128550796, "loss": 0.0668, "theoretical_loss": 3.4363271152661006, "tokens_seen": 2165833728 }, { "epoch": 0.66, "learning_rate": 0.0003470550473439256, "loss": 0.0682, "theoretical_loss": 3.4362932629122325, "tokens_seen": 2166095872 }, { "epoch": 0.66, "learning_rate": 0.00034697480340234314, "loss": 0.0729, "theoretical_loss": 3.436259415801939, "tokens_seen": 2166358016 }, { "epoch": 0.66, "learning_rate": 0.0003468945594607607, "loss": 0.0654, "theoretical_loss": 3.436225573933773, "tokens_seen": 2166620160 }, { "epoch": 0.66, "learning_rate": 0.0003468143155191783, "loss": 0.0669, "theoretical_loss": 3.43619173730629, "tokens_seen": 2166882304 }, { "epoch": 0.66, "learning_rate": 0.0003467340715775959, "loss": 0.0677, "theoretical_loss": 3.4361579059180425, "tokens_seen": 2167144448 }, { "epoch": 0.66, "learning_rate": 0.0003466538276360135, "loss": 0.0677, "theoretical_loss": 3.4361240797675876, "tokens_seen": 2167406592 }, { "epoch": 0.66, "learning_rate": 0.0003465735836944311, "loss": 0.0691, "theoretical_loss": 3.43609025885348, "tokens_seen": 2167668736 }, { "epoch": 0.66, "learning_rate": 0.0003464933397528487, "loss": 0.0662, "theoretical_loss": 3.436056443174276, "tokens_seen": 2167930880 }, { "epoch": 0.66, "learning_rate": 0.0003464130958112662, "loss": 0.0686, "theoretical_loss": 3.436022632728533, "tokens_seen": 2168193024 }, { "epoch": 0.66, "learning_rate": 0.0003463328518696838, "loss": 0.07, "theoretical_loss": 3.4359888275148083, "tokens_seen": 2168455168 }, { "epoch": 0.66, "learning_rate": 0.00034625260792810143, "loss": 0.0653, "theoretical_loss": 3.4359550275316595, "tokens_seen": 2168717312 }, { "epoch": 0.66, "learning_rate": 0.00034617236398651904, "loss": 0.0662, "theoretical_loss": 3.4359212327776456, "tokens_seen": 2168979456 }, { "epoch": 0.66, "objective/train/advantage_avg": -0.00015192697173915803, "objective/train/docs_used": 790125, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4038983583450317, "objective/train/original_loss": 1.4038983583450317, "objective/train/theoretical_loss": 3.4358874432513256, "objective/train/tokens_used": 2189701600, "objective/train/value_avg": -0.00885772705078125, "objective/train/value_loss": 0.00030142103787511587, "objective/train/value_max": -1.5437602996826172e-05, "objective/train/value_min": -0.87890625, "objective/train/value_reward_corr": 0.8012933605623311, "objective/train/value_std": 0.0206451416015625, "objective/train/weight_avg": 0.9999855756759644, "objective/train/weighted_lm_loss": 1.4039148092269897, "objective/train/weights_max": 1.7211087942123413, "objective/train/weights_min": 0.3775528371334076, "theoretical_loss": 3.4358874432513256, "tokens_seen": 2169241600 }, { "epoch": 0.66, "learning_rate": 0.0003460921200449366, "loss": 0.067, "theoretical_loss": 3.4358874432513256, "tokens_seen": 2169241600 }, { "epoch": 0.66, "learning_rate": 0.0003460118761033542, "loss": 0.0659, "theoretical_loss": 3.435853658951259, "tokens_seen": 2169503744 }, { "epoch": 0.66, "learning_rate": 0.00034593163216177183, "loss": 0.064, "theoretical_loss": 3.4358198798760067, "tokens_seen": 2169765888 }, { "epoch": 0.66, "learning_rate": 0.00034585138822018933, "loss": 0.066, "theoretical_loss": 3.4357861060241293, "tokens_seen": 2170028032 }, { "epoch": 0.66, "learning_rate": 0.00034577114427860695, "loss": 0.0669, "theoretical_loss": 3.4357523373941876, "tokens_seen": 2170290176 }, { "epoch": 0.66, "learning_rate": 0.00034569090033702456, "loss": 0.0676, "theoretical_loss": 3.4357185739847447, "tokens_seen": 2170552320 }, { "epoch": 0.66, "learning_rate": 0.00034561065639544217, "loss": 0.0659, "theoretical_loss": 3.4356848157943625, "tokens_seen": 2170814464 }, { "epoch": 0.66, "learning_rate": 0.00034553041245385973, "loss": 0.0686, "theoretical_loss": 3.4356510628216044, "tokens_seen": 2171076608 }, { "epoch": 0.66, "learning_rate": 0.00034545016851227734, "loss": 0.0696, "theoretical_loss": 3.435617315065034, "tokens_seen": 2171338752 }, { "epoch": 0.66, "learning_rate": 0.00034536992457069495, "loss": 0.0679, "theoretical_loss": 3.435583572523216, "tokens_seen": 2171600896 }, { "epoch": 0.66, "learning_rate": 0.0003452896806291125, "loss": 0.0673, "theoretical_loss": 3.4355498351947142, "tokens_seen": 2171863040 }, { "epoch": 0.66, "learning_rate": 0.00034520943668753007, "loss": 0.0668, "theoretical_loss": 3.435516103078095, "tokens_seen": 2172125184 }, { "epoch": 0.66, "learning_rate": 0.0003451291927459477, "loss": 0.0689, "theoretical_loss": 3.435482376171924, "tokens_seen": 2172387328 }, { "epoch": 0.66, "learning_rate": 0.0003450489488043653, "loss": 0.0654, "theoretical_loss": 3.435448654474768, "tokens_seen": 2172649472 }, { "epoch": 0.66, "learning_rate": 0.00034496870486278285, "loss": 0.0674, "theoretical_loss": 3.435414937985194, "tokens_seen": 2172911616 }, { "epoch": 0.66, "learning_rate": 0.00034488846092120047, "loss": 0.0689, "theoretical_loss": 3.43538122670177, "tokens_seen": 2173173760 }, { "epoch": 0.66, "learning_rate": 0.0003448082169796181, "loss": 0.0689, "theoretical_loss": 3.435347520623063, "tokens_seen": 2173435904 }, { "epoch": 0.66, "learning_rate": 0.00034472797303803564, "loss": 0.0677, "theoretical_loss": 3.435313819747644, "tokens_seen": 2173698048 }, { "epoch": 0.66, "learning_rate": 0.0003446477290964532, "loss": 0.0664, "theoretical_loss": 3.4352801240740805, "tokens_seen": 2173960192 }, { "epoch": 0.66, "learning_rate": 0.0003445674851548708, "loss": 0.0643, "theoretical_loss": 3.435246433600943, "tokens_seen": 2174222336 }, { "epoch": 0.66, "learning_rate": 0.00034448724121328837, "loss": 0.0647, "theoretical_loss": 3.4352127483268022, "tokens_seen": 2174484480 }, { "epoch": 0.66, "learning_rate": 0.000344406997271706, "loss": 0.0644, "theoretical_loss": 3.4351790682502297, "tokens_seen": 2174746624 }, { "epoch": 0.66, "learning_rate": 0.0003443267533301236, "loss": 0.0674, "theoretical_loss": 3.435145393369796, "tokens_seen": 2175008768 }, { "epoch": 0.66, "learning_rate": 0.0003442465093885412, "loss": 0.067, "theoretical_loss": 3.4351117236840745, "tokens_seen": 2175270912 }, { "epoch": 0.66, "learning_rate": 0.00034416626544695876, "loss": 0.0643, "theoretical_loss": 3.4350780591916372, "tokens_seen": 2175533056 }, { "epoch": 0.66, "objective/train/advantage_avg": 7.212981290649623e-05, "objective/train/docs_used": 792571, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3175489902496338, "objective/train/original_loss": 1.3175489902496338, "objective/train/theoretical_loss": 3.4350443998910576, "objective/train/tokens_used": 2196255200, "objective/train/value_avg": -0.01052093505859375, "objective/train/value_loss": 0.0004599344392772764, "objective/train/value_max": -4.7206878662109375e-05, "objective/train/value_min": -0.9482421875, "objective/train/value_reward_corr": 0.6958288722820336, "objective/train/value_std": 0.02197265625, "objective/train/weight_avg": 1.0002882480621338, "objective/train/weighted_lm_loss": 1.3171955347061157, "objective/train/weights_max": 2.5474281311035156, "objective/train/weights_min": 0.3810078501701355, "theoretical_loss": 3.4350443998910576, "tokens_seen": 2175795200 }, { "epoch": 0.66, "learning_rate": 0.0003440860215053764, "loss": 0.065, "theoretical_loss": 3.4350443998910576, "tokens_seen": 2175795200 }, { "epoch": 0.66, "learning_rate": 0.00034400577756379393, "loss": 0.0674, "theoretical_loss": 3.435010745780909, "tokens_seen": 2176057344 }, { "epoch": 0.66, "learning_rate": 0.0003439255336222115, "loss": 0.0651, "theoretical_loss": 3.4349770968597677, "tokens_seen": 2176319488 }, { "epoch": 0.66, "learning_rate": 0.0003438452896806291, "loss": 0.0652, "theoretical_loss": 3.4349434531262073, "tokens_seen": 2176581632 }, { "epoch": 0.66, "learning_rate": 0.0003437650457390467, "loss": 0.0666, "theoretical_loss": 3.4349098145788033, "tokens_seen": 2176843776 }, { "epoch": 0.66, "learning_rate": 0.00034368480179746433, "loss": 0.0672, "theoretical_loss": 3.434876181216133, "tokens_seen": 2177105920 }, { "epoch": 0.66, "learning_rate": 0.0003436045578558819, "loss": 0.0648, "theoretical_loss": 3.4348425530367717, "tokens_seen": 2177368064 }, { "epoch": 0.66, "learning_rate": 0.0003435243139142995, "loss": 0.0672, "theoretical_loss": 3.4348089300392974, "tokens_seen": 2177630208 }, { "epoch": 0.66, "learning_rate": 0.00034344406997271706, "loss": 0.0661, "theoretical_loss": 3.434775312222288, "tokens_seen": 2177892352 }, { "epoch": 0.66, "learning_rate": 0.0003433638260311346, "loss": 0.0677, "theoretical_loss": 3.434741699584322, "tokens_seen": 2178154496 }, { "epoch": 0.66, "learning_rate": 0.00034328358208955223, "loss": 0.0678, "theoretical_loss": 3.434708092123978, "tokens_seen": 2178416640 }, { "epoch": 0.66, "learning_rate": 0.00034320333814796984, "loss": 0.0675, "theoretical_loss": 3.434674489839836, "tokens_seen": 2178678784 }, { "epoch": 0.66, "learning_rate": 0.00034312309420638746, "loss": 0.0649, "theoretical_loss": 3.4346408927304757, "tokens_seen": 2178940928 }, { "epoch": 0.66, "learning_rate": 0.000343042850264805, "loss": 0.067, "theoretical_loss": 3.434607300794478, "tokens_seen": 2179203072 }, { "epoch": 0.66, "learning_rate": 0.00034296260632322263, "loss": 0.0645, "theoretical_loss": 3.434573714030424, "tokens_seen": 2179465216 }, { "epoch": 0.66, "learning_rate": 0.0003428823623816402, "loss": 0.0647, "theoretical_loss": 3.4345401324368954, "tokens_seen": 2179727360 }, { "epoch": 0.66, "learning_rate": 0.00034280211844005774, "loss": 0.067, "theoretical_loss": 3.4345065560124746, "tokens_seen": 2179989504 }, { "epoch": 0.66, "learning_rate": 0.00034272187449847536, "loss": 0.0659, "theoretical_loss": 3.434472984755745, "tokens_seen": 2180251648 }, { "epoch": 0.66, "learning_rate": 0.00034264163055689297, "loss": 0.0675, "theoretical_loss": 3.434439418665289, "tokens_seen": 2180513792 }, { "epoch": 0.66, "learning_rate": 0.00034256138661531053, "loss": 0.0639, "theoretical_loss": 3.434405857739691, "tokens_seen": 2180775936 }, { "epoch": 0.66, "learning_rate": 0.00034248114267372814, "loss": 0.0669, "theoretical_loss": 3.434372301977536, "tokens_seen": 2181038080 }, { "epoch": 0.66, "learning_rate": 0.00034240089873214575, "loss": 0.0671, "theoretical_loss": 3.4343387513774095, "tokens_seen": 2181300224 }, { "epoch": 0.66, "learning_rate": 0.00034232065479056337, "loss": 0.0697, "theoretical_loss": 3.434305205937896, "tokens_seen": 2181562368 }, { "epoch": 0.66, "learning_rate": 0.00034224041084898087, "loss": 0.0679, "theoretical_loss": 3.4342716656575822, "tokens_seen": 2181824512 }, { "epoch": 0.66, "learning_rate": 0.0003421601669073985, "loss": 0.0671, "theoretical_loss": 3.4342381305350553, "tokens_seen": 2182086656 }, { "epoch": 0.66, "objective/train/advantage_avg": -2.115079951181542e-05, "objective/train/docs_used": 794520, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.430501103401184, "objective/train/original_loss": 1.4305012226104736, "objective/train/theoretical_loss": 3.434204600568902, "objective/train/tokens_used": 2202808800, "objective/train/value_avg": -0.006259918212890625, "objective/train/value_loss": 0.0002908243041019887, "objective/train/value_max": -5.650520324707031e-05, "objective/train/value_min": -0.7333984375, "objective/train/value_reward_corr": 0.6777902999039132, "objective/train/value_std": 0.01520538330078125, "objective/train/weight_avg": 1.0001102685928345, "objective/train/weighted_lm_loss": 1.4302117824554443, "objective/train/weights_max": 1.45076322555542, "objective/train/weights_min": 0.3727388083934784, "theoretical_loss": 3.434204600568902, "tokens_seen": 2182348800 }, { "epoch": 0.66, "learning_rate": 0.0003420799229658161, "loss": 0.0696, "theoretical_loss": 3.434204600568902, "tokens_seen": 2182348800 }, { "epoch": 0.66, "learning_rate": 0.00034199967902423365, "loss": 0.07, "theoretical_loss": 3.434171075757711, "tokens_seen": 2182610944 }, { "epoch": 0.66, "learning_rate": 0.00034191943508265127, "loss": 0.0654, "theoretical_loss": 3.4341375561000698, "tokens_seen": 2182873088 }, { "epoch": 0.66, "learning_rate": 0.0003418391911410689, "loss": 0.0656, "theoretical_loss": 3.4341040415945683, "tokens_seen": 2183135232 }, { "epoch": 0.66, "learning_rate": 0.0003417589471994865, "loss": 0.0673, "theoretical_loss": 3.434070532239796, "tokens_seen": 2183397376 }, { "epoch": 0.66, "learning_rate": 0.000341678703257904, "loss": 0.066, "theoretical_loss": 3.4340370280343424, "tokens_seen": 2183659520 }, { "epoch": 0.66, "learning_rate": 0.0003415984593163216, "loss": 0.065, "theoretical_loss": 3.434003528976799, "tokens_seen": 2183921664 }, { "epoch": 0.66, "learning_rate": 0.0003415182153747392, "loss": 0.0663, "theoretical_loss": 3.433970035065756, "tokens_seen": 2184183808 }, { "epoch": 0.66, "learning_rate": 0.0003414379714331568, "loss": 0.0674, "theoretical_loss": 3.4339365462998064, "tokens_seen": 2184445952 }, { "epoch": 0.66, "learning_rate": 0.0003413577274915744, "loss": 0.065, "theoretical_loss": 3.4339030626775413, "tokens_seen": 2184708096 }, { "epoch": 0.66, "learning_rate": 0.000341277483549992, "loss": 0.0653, "theoretical_loss": 3.433869584197555, "tokens_seen": 2184970240 }, { "epoch": 0.66, "learning_rate": 0.0003411972396084096, "loss": 0.0665, "theoretical_loss": 3.43383611085844, "tokens_seen": 2185232384 }, { "epoch": 0.66, "learning_rate": 0.0003411169956668271, "loss": 0.0661, "theoretical_loss": 3.43380264265879, "tokens_seen": 2185494528 }, { "epoch": 0.66, "learning_rate": 0.00034103675172524473, "loss": 0.0652, "theoretical_loss": 3.433769179597201, "tokens_seen": 2185756672 }, { "epoch": 0.66, "learning_rate": 0.00034095650778366235, "loss": 0.0669, "theoretical_loss": 3.433735721672267, "tokens_seen": 2186018816 }, { "epoch": 0.66, "learning_rate": 0.0003408762638420799, "loss": 0.0664, "theoretical_loss": 3.4337022688825836, "tokens_seen": 2186280960 }, { "epoch": 0.66, "learning_rate": 0.0003407960199004975, "loss": 0.0687, "theoretical_loss": 3.4336688212267474, "tokens_seen": 2186543104 }, { "epoch": 0.66, "learning_rate": 0.00034071577595891513, "loss": 0.0695, "theoretical_loss": 3.4336353787033556, "tokens_seen": 2186805248 }, { "epoch": 0.66, "learning_rate": 0.00034063553201733274, "loss": 0.066, "theoretical_loss": 3.4336019413110046, "tokens_seen": 2187067392 }, { "epoch": 0.66, "learning_rate": 0.0003405552880757503, "loss": 0.0653, "theoretical_loss": 3.4335685090482926, "tokens_seen": 2187329536 }, { "epoch": 0.66, "learning_rate": 0.00034047504413416786, "loss": 0.0701, "theoretical_loss": 3.4335350819138184, "tokens_seen": 2187591680 }, { "epoch": 0.66, "learning_rate": 0.00034039480019258547, "loss": 0.0664, "theoretical_loss": 3.433501659906181, "tokens_seen": 2187853824 }, { "epoch": 0.66, "learning_rate": 0.00034031455625100303, "loss": 0.0656, "theoretical_loss": 3.433468243023979, "tokens_seen": 2188115968 }, { "epoch": 0.66, "learning_rate": 0.00034023431230942064, "loss": 0.0658, "theoretical_loss": 3.433434831265814, "tokens_seen": 2188378112 }, { "epoch": 0.66, "learning_rate": 0.00034015406836783826, "loss": 0.0675, "theoretical_loss": 3.4334014246302855, "tokens_seen": 2188640256 }, { "epoch": 0.66, "objective/train/advantage_avg": 4.372426701593213e-05, "objective/train/docs_used": 796788, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2543916702270508, "objective/train/original_loss": 1.2543916702270508, "objective/train/theoretical_loss": 3.4333680231159946, "objective/train/tokens_used": 2209362400, "objective/train/value_avg": -0.0097503662109375, "objective/train/value_loss": 0.00034007534850388765, "objective/train/value_max": -2.7298927307128906e-05, "objective/train/value_min": -0.71875, "objective/train/value_reward_corr": 0.7060483238102327, "objective/train/value_std": 0.0169219970703125, "objective/train/weight_avg": 1.0001929998397827, "objective/train/weighted_lm_loss": 1.2542983293533325, "objective/train/weights_max": 1.3613519668579102, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.4333680231159946, "tokens_seen": 2188902400 }, { "epoch": 0.66, "learning_rate": 0.0003400738244262558, "loss": 0.0668, "theoretical_loss": 3.4333680231159946, "tokens_seen": 2188902400 }, { "epoch": 0.66, "learning_rate": 0.0003399935804846734, "loss": 0.066, "theoretical_loss": 3.433334626721544, "tokens_seen": 2189164544 }, { "epoch": 0.66, "learning_rate": 0.000339913336543091, "loss": 0.0669, "theoretical_loss": 3.4333012354455352, "tokens_seen": 2189426688 }, { "epoch": 0.66, "learning_rate": 0.0003398330926015086, "loss": 0.0669, "theoretical_loss": 3.433267849286571, "tokens_seen": 2189688832 }, { "epoch": 0.66, "learning_rate": 0.00033975284865992616, "loss": 0.072, "theoretical_loss": 3.4332344682432554, "tokens_seen": 2189950976 }, { "epoch": 0.66, "learning_rate": 0.00033967260471834377, "loss": 0.0674, "theoretical_loss": 3.4332010923141913, "tokens_seen": 2190213120 }, { "epoch": 0.66, "learning_rate": 0.0003395923607767614, "loss": 0.0646, "theoretical_loss": 3.4331677214979845, "tokens_seen": 2190475264 }, { "epoch": 0.66, "learning_rate": 0.00033951211683517894, "loss": 0.069, "theoretical_loss": 3.4331343557932392, "tokens_seen": 2190737408 }, { "epoch": 0.66, "learning_rate": 0.00033943187289359655, "loss": 0.0655, "theoretical_loss": 3.433100995198561, "tokens_seen": 2190999552 }, { "epoch": 0.66, "learning_rate": 0.00033935162895201417, "loss": 0.0672, "theoretical_loss": 3.433067639712556, "tokens_seen": 2191261696 }, { "epoch": 0.66, "learning_rate": 0.0003392713850104317, "loss": 0.0652, "theoretical_loss": 3.4330342893338313, "tokens_seen": 2191523840 }, { "epoch": 0.66, "learning_rate": 0.0003391911410688493, "loss": 0.0638, "theoretical_loss": 3.4330009440609937, "tokens_seen": 2191785984 }, { "epoch": 0.66, "learning_rate": 0.0003391108971272669, "loss": 0.0661, "theoretical_loss": 3.432967603892651, "tokens_seen": 2192048128 }, { "epoch": 0.66, "learning_rate": 0.0003390306531856845, "loss": 0.0696, "theoretical_loss": 3.432934268827412, "tokens_seen": 2192310272 }, { "epoch": 0.66, "learning_rate": 0.00033895040924410207, "loss": 0.0664, "theoretical_loss": 3.4329009388638845, "tokens_seen": 2192572416 }, { "epoch": 0.66, "learning_rate": 0.0003388701653025197, "loss": 0.068, "theoretical_loss": 3.4328676140006786, "tokens_seen": 2192834560 }, { "epoch": 0.66, "learning_rate": 0.0003387899213609373, "loss": 0.0681, "theoretical_loss": 3.4328342942364043, "tokens_seen": 2193096704 }, { "epoch": 0.66, "learning_rate": 0.00033870967741935485, "loss": 0.0648, "theoretical_loss": 3.432800979569672, "tokens_seen": 2193358848 }, { "epoch": 0.66, "learning_rate": 0.0003386294334777724, "loss": 0.0667, "theoretical_loss": 3.4327676699990928, "tokens_seen": 2193620992 }, { "epoch": 0.66, "learning_rate": 0.00033854918953619, "loss": 0.0674, "theoretical_loss": 3.432734365523278, "tokens_seen": 2193883136 }, { "epoch": 0.66, "learning_rate": 0.00033846894559460763, "loss": 0.0694, "theoretical_loss": 3.4327010661408397, "tokens_seen": 2194145280 }, { "epoch": 0.66, "learning_rate": 0.0003383887016530252, "loss": 0.067, "theoretical_loss": 3.432667771850391, "tokens_seen": 2194407424 }, { "epoch": 0.67, "learning_rate": 0.0003383084577114428, "loss": 0.0668, "theoretical_loss": 3.4326344826505446, "tokens_seen": 2194669568 }, { "epoch": 0.67, "learning_rate": 0.0003382282137698604, "loss": 0.0669, "theoretical_loss": 3.4326011985399147, "tokens_seen": 2194931712 }, { "epoch": 0.67, "learning_rate": 0.0003381479698282779, "loss": 0.068, "theoretical_loss": 3.4325679195171155, "tokens_seen": 2195193856 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.00028255730285309255, "objective/train/docs_used": 799213, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3589651584625244, "objective/train/original_loss": 1.3589650392532349, "objective/train/theoretical_loss": 3.4325346455807613, "objective/train/tokens_used": 2215916000, "objective/train/value_avg": -0.00843048095703125, "objective/train/value_loss": 0.00023773591965436935, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.79150390625, "objective/train/value_reward_corr": 0.7414294469085657, "objective/train/value_std": 0.0166168212890625, "objective/train/weight_avg": 1.0003902912139893, "objective/train/weighted_lm_loss": 1.3589110374450684, "objective/train/weights_max": 1.6519445180892944, "objective/train/weights_min": 0.23257870972156525, "theoretical_loss": 3.4325346455807613, "tokens_seen": 2195456000 }, { "epoch": 0.67, "learning_rate": 0.00033806772588669553, "loss": 0.0643, "theoretical_loss": 3.4325346455807613, "tokens_seen": 2195456000 }, { "epoch": 0.67, "learning_rate": 0.00033798748194511315, "loss": 0.0677, "theoretical_loss": 3.4325013767294683, "tokens_seen": 2195718144 }, { "epoch": 0.67, "learning_rate": 0.00033790723800353076, "loss": 0.0686, "theoretical_loss": 3.4324681129618515, "tokens_seen": 2195980288 }, { "epoch": 0.67, "learning_rate": 0.0003378269940619483, "loss": 0.0668, "theoretical_loss": 3.4324348542765284, "tokens_seen": 2196242432 }, { "epoch": 0.67, "learning_rate": 0.00033774675012036593, "loss": 0.0699, "theoretical_loss": 3.432401600672115, "tokens_seen": 2196504576 }, { "epoch": 0.67, "learning_rate": 0.00033766650617878354, "loss": 0.0671, "theoretical_loss": 3.43236835214723, "tokens_seen": 2196766720 }, { "epoch": 0.67, "learning_rate": 0.0003375862622372011, "loss": 0.0664, "theoretical_loss": 3.4323351087004905, "tokens_seen": 2197028864 }, { "epoch": 0.67, "learning_rate": 0.00033750601829561866, "loss": 0.0645, "theoretical_loss": 3.4323018703305155, "tokens_seen": 2197291008 }, { "epoch": 0.67, "learning_rate": 0.00033742577435403627, "loss": 0.0656, "theoretical_loss": 3.432268637035924, "tokens_seen": 2197553152 }, { "epoch": 0.67, "learning_rate": 0.0003373455304124539, "loss": 0.0684, "theoretical_loss": 3.4322354088153357, "tokens_seen": 2197815296 }, { "epoch": 0.67, "learning_rate": 0.00033726528647087144, "loss": 0.0679, "theoretical_loss": 3.432202185667371, "tokens_seen": 2198077440 }, { "epoch": 0.67, "learning_rate": 0.00033718504252928906, "loss": 0.0668, "theoretical_loss": 3.432168967590651, "tokens_seen": 2198339584 }, { "epoch": 0.67, "learning_rate": 0.00033710479858770667, "loss": 0.0674, "theoretical_loss": 3.432135754583796, "tokens_seen": 2198601728 }, { "epoch": 0.67, "learning_rate": 0.0003370245546461242, "loss": 0.0672, "theoretical_loss": 3.432102546645429, "tokens_seen": 2198863872 }, { "epoch": 0.67, "learning_rate": 0.0003369443107045418, "loss": 0.0678, "theoretical_loss": 3.432069343774172, "tokens_seen": 2199126016 }, { "epoch": 0.67, "learning_rate": 0.0003368640667629594, "loss": 0.068, "theoretical_loss": 3.432036145968648, "tokens_seen": 2199388160 }, { "epoch": 0.67, "learning_rate": 0.000336783822821377, "loss": 0.0682, "theoretical_loss": 3.43200295322748, "tokens_seen": 2199650304 }, { "epoch": 0.67, "learning_rate": 0.00033670357887979457, "loss": 0.0646, "theoretical_loss": 3.431969765549293, "tokens_seen": 2199912448 }, { "epoch": 0.67, "learning_rate": 0.0003366233349382122, "loss": 0.0661, "theoretical_loss": 3.43193658293271, "tokens_seen": 2200174592 }, { "epoch": 0.67, "learning_rate": 0.0003365430909966298, "loss": 0.068, "theoretical_loss": 3.4319034053763575, "tokens_seen": 2200436736 }, { "epoch": 0.67, "learning_rate": 0.00033646284705504735, "loss": 0.0656, "theoretical_loss": 3.431870232878861, "tokens_seen": 2200698880 }, { "epoch": 0.67, "learning_rate": 0.0003363826031134649, "loss": 0.0671, "theoretical_loss": 3.431837065438846, "tokens_seen": 2200961024 }, { "epoch": 0.67, "learning_rate": 0.0003363023591718825, "loss": 0.0682, "theoretical_loss": 3.4318039030549397, "tokens_seen": 2201223168 }, { "epoch": 0.67, "learning_rate": 0.0003362221152303001, "loss": 0.0663, "theoretical_loss": 3.431770745725769, "tokens_seen": 2201485312 }, { "epoch": 0.67, "learning_rate": 0.0003361418712887177, "loss": 0.0642, "theoretical_loss": 3.431737593449962, "tokens_seen": 2201747456 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.000496376771479845, "objective/train/docs_used": 801578, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3730624914169312, "objective/train/original_loss": 1.3730623722076416, "objective/train/theoretical_loss": 3.4317044462261466, "objective/train/tokens_used": 2222469600, "objective/train/value_avg": -0.00689697265625, "objective/train/value_loss": 0.00029629369964823127, "objective/train/value_max": -3.3974647521972656e-05, "objective/train/value_min": -0.7666015625, "objective/train/value_reward_corr": 0.737004677943326, "objective/train/value_std": 0.0198822021484375, "objective/train/weight_avg": 1.0006368160247803, "objective/train/weighted_lm_loss": 1.373940110206604, "objective/train/weights_max": 2.072002649307251, "objective/train/weights_min": 0.3690805733203888, "theoretical_loss": 3.4317044462261466, "tokens_seen": 2202009600 }, { "epoch": 0.67, "learning_rate": 0.0003360616273471353, "loss": 0.0654, "theoretical_loss": 3.4317044462261466, "tokens_seen": 2202009600 }, { "epoch": 0.67, "learning_rate": 0.0003359813834055529, "loss": 0.0665, "theoretical_loss": 3.4316713040529523, "tokens_seen": 2202271744 }, { "epoch": 0.67, "learning_rate": 0.0003359011394639705, "loss": 0.0672, "theoretical_loss": 3.4316381669290084, "tokens_seen": 2202533888 }, { "epoch": 0.67, "learning_rate": 0.0003358208955223881, "loss": 0.0655, "theoretical_loss": 3.431605034852944, "tokens_seen": 2202796032 }, { "epoch": 0.67, "learning_rate": 0.00033574065158080565, "loss": 0.0703, "theoretical_loss": 3.43157190782339, "tokens_seen": 2203058176 }, { "epoch": 0.67, "learning_rate": 0.0003356604076392232, "loss": 0.065, "theoretical_loss": 3.431538785838978, "tokens_seen": 2203320320 }, { "epoch": 0.67, "learning_rate": 0.0003355801636976408, "loss": 0.0675, "theoretical_loss": 3.4315056688983385, "tokens_seen": 2203582464 }, { "epoch": 0.67, "learning_rate": 0.00033549991975605843, "loss": 0.0687, "theoretical_loss": 3.4314725570001046, "tokens_seen": 2203844608 }, { "epoch": 0.67, "learning_rate": 0.00033541967581447604, "loss": 0.066, "theoretical_loss": 3.431439450142908, "tokens_seen": 2204106752 }, { "epoch": 0.67, "learning_rate": 0.0003353394318728936, "loss": 0.0692, "theoretical_loss": 3.4314063483253823, "tokens_seen": 2204368896 }, { "epoch": 0.67, "learning_rate": 0.0003352591879313112, "loss": 0.0656, "theoretical_loss": 3.431373251546161, "tokens_seen": 2204631040 }, { "epoch": 0.67, "learning_rate": 0.0003351789439897288, "loss": 0.069, "theoretical_loss": 3.431340159803878, "tokens_seen": 2204893184 }, { "epoch": 0.67, "learning_rate": 0.00033509870004814633, "loss": 0.0666, "theoretical_loss": 3.4313070730971686, "tokens_seen": 2205155328 }, { "epoch": 0.67, "learning_rate": 0.00033501845610656395, "loss": 0.0691, "theoretical_loss": 3.431273991424668, "tokens_seen": 2205417472 }, { "epoch": 0.67, "learning_rate": 0.00033493821216498156, "loss": 0.0681, "theoretical_loss": 3.431240914785012, "tokens_seen": 2205679616 }, { "epoch": 0.67, "learning_rate": 0.00033485796822339917, "loss": 0.0654, "theoretical_loss": 3.431207843176836, "tokens_seen": 2205941760 }, { "epoch": 0.67, "learning_rate": 0.00033477772428181673, "loss": 0.0678, "theoretical_loss": 3.431174776598778, "tokens_seen": 2206203904 }, { "epoch": 0.67, "learning_rate": 0.00033469748034023434, "loss": 0.0675, "theoretical_loss": 3.431141715049475, "tokens_seen": 2206466048 }, { "epoch": 0.67, "learning_rate": 0.00033461723639865195, "loss": 0.0672, "theoretical_loss": 3.4311086585275645, "tokens_seen": 2206728192 }, { "epoch": 0.67, "learning_rate": 0.00033453699245706946, "loss": 0.0656, "theoretical_loss": 3.4310756070316857, "tokens_seen": 2206990336 }, { "epoch": 0.67, "learning_rate": 0.00033445674851548707, "loss": 0.0673, "theoretical_loss": 3.431042560560477, "tokens_seen": 2207252480 }, { "epoch": 0.67, "learning_rate": 0.0003343765045739047, "loss": 0.0689, "theoretical_loss": 3.431009519112578, "tokens_seen": 2207514624 }, { "epoch": 0.67, "learning_rate": 0.00033429626063232224, "loss": 0.0684, "theoretical_loss": 3.4309764826866287, "tokens_seen": 2207776768 }, { "epoch": 0.67, "learning_rate": 0.00033421601669073985, "loss": 0.0675, "theoretical_loss": 3.43094345128127, "tokens_seen": 2208038912 }, { "epoch": 0.67, "learning_rate": 0.00033413577274915747, "loss": 0.0699, "theoretical_loss": 3.430910424895143, "tokens_seen": 2208301056 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.0003951071994379163, "objective/train/docs_used": 804023, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3737064599990845, "objective/train/original_loss": 1.373706579208374, "objective/train/theoretical_loss": 3.4308774035268885, "objective/train/tokens_used": 2229023200, "objective/train/value_avg": -0.00640869140625, "objective/train/value_loss": 0.00019101255747955292, "objective/train/value_max": -4.9114227294921875e-05, "objective/train/value_min": -0.58740234375, "objective/train/value_reward_corr": 0.6632108587219194, "objective/train/value_std": 0.012176513671875, "objective/train/weight_avg": 1.0004802942276, "objective/train/weighted_lm_loss": 1.3744759559631348, "objective/train/weights_max": 1.470705509185791, "objective/train/weights_min": 0.36826980113983154, "theoretical_loss": 3.4308774035268885, "tokens_seen": 2208563200 }, { "epoch": 0.67, "learning_rate": 0.0003340555288075751, "loss": 0.0686, "theoretical_loss": 3.4308774035268885, "tokens_seen": 2208563200 }, { "epoch": 0.67, "learning_rate": 0.0003339752848659926, "loss": 0.0676, "theoretical_loss": 3.4308443871751497, "tokens_seen": 2208825344 }, { "epoch": 0.67, "learning_rate": 0.0003338950409244102, "loss": 0.0644, "theoretical_loss": 3.4308113758385685, "tokens_seen": 2209087488 }, { "epoch": 0.67, "learning_rate": 0.0003338147969828278, "loss": 0.0685, "theoretical_loss": 3.4307783695157887, "tokens_seen": 2209349632 }, { "epoch": 0.67, "learning_rate": 0.00033373455304124537, "loss": 0.0675, "theoretical_loss": 3.4307453682054536, "tokens_seen": 2209611776 }, { "epoch": 0.67, "learning_rate": 0.000333654309099663, "loss": 0.0674, "theoretical_loss": 3.430712371906208, "tokens_seen": 2209873920 }, { "epoch": 0.67, "learning_rate": 0.0003335740651580806, "loss": 0.0674, "theoretical_loss": 3.4306793806166955, "tokens_seen": 2210136064 }, { "epoch": 0.67, "learning_rate": 0.0003334938212164982, "loss": 0.0662, "theoretical_loss": 3.4306463943355627, "tokens_seen": 2210398208 }, { "epoch": 0.67, "learning_rate": 0.0003334135772749157, "loss": 0.0672, "theoretical_loss": 3.430613413061455, "tokens_seen": 2210660352 }, { "epoch": 0.67, "learning_rate": 0.0003333333333333333, "loss": 0.0673, "theoretical_loss": 3.4305804367930186, "tokens_seen": 2210922496 }, { "epoch": 0.67, "learning_rate": 0.00033325308939175093, "loss": 0.0639, "theoretical_loss": 3.4305474655289006, "tokens_seen": 2211184640 }, { "epoch": 0.67, "learning_rate": 0.0003331728454501685, "loss": 0.0678, "theoretical_loss": 3.4305144992677485, "tokens_seen": 2211446784 }, { "epoch": 0.67, "learning_rate": 0.0003330926015085861, "loss": 0.066, "theoretical_loss": 3.4304815380082103, "tokens_seen": 2211708928 }, { "epoch": 0.67, "learning_rate": 0.0003330123575670037, "loss": 0.065, "theoretical_loss": 3.430448581748934, "tokens_seen": 2211971072 }, { "epoch": 0.67, "learning_rate": 0.00033293211362542133, "loss": 0.0692, "theoretical_loss": 3.430415630488569, "tokens_seen": 2212233216 }, { "epoch": 0.67, "learning_rate": 0.0003328518696838389, "loss": 0.0686, "theoretical_loss": 3.4303826842257648, "tokens_seen": 2212495360 }, { "epoch": 0.67, "learning_rate": 0.00033277162574225645, "loss": 0.0676, "theoretical_loss": 3.430349742959171, "tokens_seen": 2212757504 }, { "epoch": 0.67, "learning_rate": 0.00033269138180067406, "loss": 0.0657, "theoretical_loss": 3.4303168066874385, "tokens_seen": 2213019648 }, { "epoch": 0.67, "learning_rate": 0.0003326111378590916, "loss": 0.067, "theoretical_loss": 3.430283875409219, "tokens_seen": 2213281792 }, { "epoch": 0.67, "learning_rate": 0.00033253089391750923, "loss": 0.065, "theoretical_loss": 3.430250949123163, "tokens_seen": 2213543936 }, { "epoch": 0.67, "learning_rate": 0.00033245064997592684, "loss": 0.0669, "theoretical_loss": 3.430218027827924, "tokens_seen": 2213806080 }, { "epoch": 0.67, "learning_rate": 0.00033237040603434446, "loss": 0.068, "theoretical_loss": 3.430185111522153, "tokens_seen": 2214068224 }, { "epoch": 0.67, "learning_rate": 0.000332290162092762, "loss": 0.0642, "theoretical_loss": 3.4301522002045046, "tokens_seen": 2214330368 }, { "epoch": 0.67, "learning_rate": 0.0003322099181511796, "loss": 0.0692, "theoretical_loss": 3.430119293873632, "tokens_seen": 2214592512 }, { "epoch": 0.67, "learning_rate": 0.0003321296742095972, "loss": 0.067, "theoretical_loss": 3.4300863925281893, "tokens_seen": 2214854656 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.000551328994333744, "objective/train/docs_used": 806281, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2843917608261108, "objective/train/original_loss": 1.2843916416168213, "objective/train/theoretical_loss": 3.430053496166831, "objective/train/tokens_used": 2235576800, "objective/train/value_avg": -0.00879669189453125, "objective/train/value_loss": 0.0003198671620339155, "objective/train/value_max": -2.372264862060547e-05, "objective/train/value_min": -0.97216796875, "objective/train/value_reward_corr": 0.741845592361158, "objective/train/value_std": 0.0195159912109375, "objective/train/weight_avg": 1.0007034540176392, "objective/train/weighted_lm_loss": 1.2850383520126343, "objective/train/weights_max": 1.8766188621520996, "objective/train/weights_min": 0.38213738799095154, "theoretical_loss": 3.430053496166831, "tokens_seen": 2215116800 }, { "epoch": 0.67, "learning_rate": 0.00033204943026801474, "loss": 0.0662, "theoretical_loss": 3.430053496166831, "tokens_seen": 2215116800 }, { "epoch": 0.67, "learning_rate": 0.00033196918632643236, "loss": 0.0668, "theoretical_loss": 3.4300206047882136, "tokens_seen": 2215378944 }, { "epoch": 0.67, "learning_rate": 0.00033188894238484997, "loss": 0.0678, "theoretical_loss": 3.4299877183909917, "tokens_seen": 2215641088 }, { "epoch": 0.67, "learning_rate": 0.00033180869844326753, "loss": 0.0665, "theoretical_loss": 3.4299548369738218, "tokens_seen": 2215903232 }, { "epoch": 0.67, "learning_rate": 0.00033172845450168514, "loss": 0.0648, "theoretical_loss": 3.429921960535361, "tokens_seen": 2216165376 }, { "epoch": 0.67, "learning_rate": 0.0003316482105601027, "loss": 0.0667, "theoretical_loss": 3.429889089074267, "tokens_seen": 2216427520 }, { "epoch": 0.67, "learning_rate": 0.0003315679666185203, "loss": 0.0636, "theoretical_loss": 3.429856222589197, "tokens_seen": 2216689664 }, { "epoch": 0.67, "learning_rate": 0.00033148772267693787, "loss": 0.0673, "theoretical_loss": 3.4298233610788094, "tokens_seen": 2216951808 }, { "epoch": 0.67, "learning_rate": 0.0003314074787353555, "loss": 0.067, "theoretical_loss": 3.429790504541764, "tokens_seen": 2217213952 }, { "epoch": 0.67, "learning_rate": 0.0003313272347937731, "loss": 0.066, "theoretical_loss": 3.4297576529767193, "tokens_seen": 2217476096 }, { "epoch": 0.67, "learning_rate": 0.00033124699085219065, "loss": 0.0671, "theoretical_loss": 3.4297248063823362, "tokens_seen": 2217738240 }, { "epoch": 0.67, "learning_rate": 0.00033116674691060827, "loss": 0.066, "theoretical_loss": 3.4296919647572746, "tokens_seen": 2218000384 }, { "epoch": 0.67, "learning_rate": 0.0003310865029690259, "loss": 0.067, "theoretical_loss": 3.4296591281001954, "tokens_seen": 2218262528 }, { "epoch": 0.67, "learning_rate": 0.00033100625902744344, "loss": 0.0643, "theoretical_loss": 3.42962629640976, "tokens_seen": 2218524672 }, { "epoch": 0.67, "learning_rate": 0.000330926015085861, "loss": 0.0682, "theoretical_loss": 3.4295934696846313, "tokens_seen": 2218786816 }, { "epoch": 0.67, "learning_rate": 0.0003308457711442786, "loss": 0.0642, "theoretical_loss": 3.4295606479234713, "tokens_seen": 2219048960 }, { "epoch": 0.67, "learning_rate": 0.0003307655272026962, "loss": 0.0661, "theoretical_loss": 3.429527831124943, "tokens_seen": 2219311104 }, { "epoch": 0.67, "learning_rate": 0.0003306852832611138, "loss": 0.0673, "theoretical_loss": 3.42949501928771, "tokens_seen": 2219573248 }, { "epoch": 0.67, "learning_rate": 0.0003306050393195314, "loss": 0.0649, "theoretical_loss": 3.4294622124104364, "tokens_seen": 2219835392 }, { "epoch": 0.67, "learning_rate": 0.000330524795377949, "loss": 0.064, "theoretical_loss": 3.429429410491787, "tokens_seen": 2220097536 }, { "epoch": 0.67, "learning_rate": 0.00033044455143636656, "loss": 0.066, "theoretical_loss": 3.429396613530427, "tokens_seen": 2220359680 }, { "epoch": 0.67, "learning_rate": 0.0003303643074947841, "loss": 0.0668, "theoretical_loss": 3.429363821525022, "tokens_seen": 2220621824 }, { "epoch": 0.67, "learning_rate": 0.00033028406355320173, "loss": 0.0667, "theoretical_loss": 3.4293310344742385, "tokens_seen": 2220883968 }, { "epoch": 0.67, "learning_rate": 0.00033020381961161935, "loss": 0.065, "theoretical_loss": 3.429298252376743, "tokens_seen": 2221146112 }, { "epoch": 0.67, "learning_rate": 0.0003301235756700369, "loss": 0.0638, "theoretical_loss": 3.429265475231202, "tokens_seen": 2221408256 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.0007514514145441353, "objective/train/docs_used": 808517, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2400312423706055, "objective/train/original_loss": 1.2400312423706055, "objective/train/theoretical_loss": 3.429232703036284, "objective/train/tokens_used": 2242130400, "objective/train/value_avg": -0.00983428955078125, "objective/train/value_loss": 0.0003172192955389619, "objective/train/value_max": -7.724761962890625e-05, "objective/train/value_min": -0.80078125, "objective/train/value_reward_corr": 0.7819778708574383, "objective/train/value_std": 0.0235595703125, "objective/train/weight_avg": 1.000901460647583, "objective/train/weighted_lm_loss": 1.2409225702285767, "objective/train/weights_max": 1.667690396308899, "objective/train/weights_min": 0.41253384947776794, "theoretical_loss": 3.429232703036284, "tokens_seen": 2221670400 }, { "epoch": 0.67, "learning_rate": 0.0003300433317284545, "loss": 0.0651, "theoretical_loss": 3.429232703036284, "tokens_seen": 2221670400 }, { "epoch": 0.67, "learning_rate": 0.00032996308778687213, "loss": 0.067, "theoretical_loss": 3.429199935790657, "tokens_seen": 2221932544 }, { "epoch": 0.67, "learning_rate": 0.0003298828438452897, "loss": 0.0689, "theoretical_loss": 3.4291671734929903, "tokens_seen": 2222194688 }, { "epoch": 0.67, "learning_rate": 0.00032980259990370725, "loss": 0.0683, "theoretical_loss": 3.4291344161419524, "tokens_seen": 2222456832 }, { "epoch": 0.67, "learning_rate": 0.00032972235596212486, "loss": 0.066, "theoretical_loss": 3.4291016637362137, "tokens_seen": 2222718976 }, { "epoch": 0.67, "learning_rate": 0.00032964211202054247, "loss": 0.0657, "theoretical_loss": 3.429068916274444, "tokens_seen": 2222981120 }, { "epoch": 0.67, "learning_rate": 0.00032956186807896003, "loss": 0.0654, "theoretical_loss": 3.429036173755314, "tokens_seen": 2223243264 }, { "epoch": 0.67, "learning_rate": 0.00032948162413737764, "loss": 0.0693, "theoretical_loss": 3.429003436177496, "tokens_seen": 2223505408 }, { "epoch": 0.67, "learning_rate": 0.00032940138019579526, "loss": 0.067, "theoretical_loss": 3.4289707035396613, "tokens_seen": 2223767552 }, { "epoch": 0.67, "learning_rate": 0.0003293211362542128, "loss": 0.0697, "theoretical_loss": 3.428937975840482, "tokens_seen": 2224029696 }, { "epoch": 0.67, "learning_rate": 0.00032924089231263037, "loss": 0.0686, "theoretical_loss": 3.4289052530786313, "tokens_seen": 2224291840 }, { "epoch": 0.67, "learning_rate": 0.000329160648371048, "loss": 0.068, "theoretical_loss": 3.4288725352527827, "tokens_seen": 2224553984 }, { "epoch": 0.67, "learning_rate": 0.0003290804044294656, "loss": 0.0661, "theoretical_loss": 3.42883982236161, "tokens_seen": 2224816128 }, { "epoch": 0.67, "learning_rate": 0.00032900016048788316, "loss": 0.0649, "theoretical_loss": 3.428807114403787, "tokens_seen": 2225078272 }, { "epoch": 0.67, "learning_rate": 0.00032891991654630077, "loss": 0.0627, "theoretical_loss": 3.428774411377989, "tokens_seen": 2225340416 }, { "epoch": 0.67, "learning_rate": 0.0003288396726047184, "loss": 0.0677, "theoretical_loss": 3.4287417132828923, "tokens_seen": 2225602560 }, { "epoch": 0.67, "learning_rate": 0.00032875942866313594, "loss": 0.0656, "theoretical_loss": 3.428709020117172, "tokens_seen": 2225864704 }, { "epoch": 0.67, "learning_rate": 0.0003286791847215535, "loss": 0.0646, "theoretical_loss": 3.4286763318795046, "tokens_seen": 2226126848 }, { "epoch": 0.67, "learning_rate": 0.0003285989407799711, "loss": 0.064, "theoretical_loss": 3.428643648568567, "tokens_seen": 2226388992 }, { "epoch": 0.67, "learning_rate": 0.0003285186968383887, "loss": 0.0684, "theoretical_loss": 3.4286109701830374, "tokens_seen": 2226651136 }, { "epoch": 0.67, "learning_rate": 0.0003284384528968063, "loss": 0.0678, "theoretical_loss": 3.428578296721593, "tokens_seen": 2226913280 }, { "epoch": 0.67, "learning_rate": 0.0003283582089552239, "loss": 0.0669, "theoretical_loss": 3.4285456281829125, "tokens_seen": 2227175424 }, { "epoch": 0.68, "learning_rate": 0.0003282779650136415, "loss": 0.0694, "theoretical_loss": 3.428512964565675, "tokens_seen": 2227437568 }, { "epoch": 0.68, "learning_rate": 0.00032819772107205907, "loss": 0.0664, "theoretical_loss": 3.4284803058685602, "tokens_seen": 2227699712 }, { "epoch": 0.68, "learning_rate": 0.0003281174771304767, "loss": 0.0659, "theoretical_loss": 3.428447652090248, "tokens_seen": 2227961856 }, { "epoch": 0.68, "objective/train/advantage_avg": -0.0001387705997331068, "objective/train/docs_used": 811140, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2117676734924316, "objective/train/original_loss": 1.2117676734924316, "objective/train/theoretical_loss": 3.4284150032294187, "objective/train/tokens_used": 2248684000, "objective/train/value_avg": -0.009246826171875, "objective/train/value_loss": 0.0001913748710649088, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.2802734375, "objective/train/value_reward_corr": 0.834702300274055, "objective/train/value_std": 0.0182647705078125, "objective/train/weight_avg": 0.999951958656311, "objective/train/weighted_lm_loss": 1.212104320526123, "objective/train/weights_max": 1.2279880046844482, "objective/train/weights_min": 0.372374951839447, "theoretical_loss": 3.4284150032294187, "tokens_seen": 2228224000 }, { "epoch": 0.68, "learning_rate": 0.00032803723318889424, "loss": 0.0672, "theoretical_loss": 3.4284150032294187, "tokens_seen": 2228224000 }, { "epoch": 0.68, "learning_rate": 0.0003279569892473118, "loss": 0.0666, "theoretical_loss": 3.428382359284754, "tokens_seen": 2228486144 }, { "epoch": 0.68, "learning_rate": 0.0003278767453057294, "loss": 0.0673, "theoretical_loss": 3.4283497202549347, "tokens_seen": 2228748288 }, { "epoch": 0.68, "learning_rate": 0.000327796501364147, "loss": 0.0689, "theoretical_loss": 3.4283170861386436, "tokens_seen": 2229010432 }, { "epoch": 0.68, "learning_rate": 0.00032771625742256463, "loss": 0.0684, "theoretical_loss": 3.4282844569345623, "tokens_seen": 2229272576 }, { "epoch": 0.68, "learning_rate": 0.0003276360134809822, "loss": 0.0675, "theoretical_loss": 3.428251832641375, "tokens_seen": 2229534720 }, { "epoch": 0.68, "learning_rate": 0.0003275557695393998, "loss": 0.0685, "theoretical_loss": 3.4282192132577647, "tokens_seen": 2229796864 }, { "epoch": 0.68, "learning_rate": 0.00032747552559781736, "loss": 0.0666, "theoretical_loss": 3.4281865987824154, "tokens_seen": 2230059008 }, { "epoch": 0.68, "learning_rate": 0.0003273952816562349, "loss": 0.0661, "theoretical_loss": 3.428153989214012, "tokens_seen": 2230321152 }, { "epoch": 0.68, "learning_rate": 0.00032731503771465253, "loss": 0.0655, "theoretical_loss": 3.4281213845512397, "tokens_seen": 2230583296 }, { "epoch": 0.68, "learning_rate": 0.00032723479377307015, "loss": 0.0643, "theoretical_loss": 3.428088784792784, "tokens_seen": 2230845440 }, { "epoch": 0.68, "learning_rate": 0.00032715454983148776, "loss": 0.0696, "theoretical_loss": 3.4280561899373305, "tokens_seen": 2231107584 }, { "epoch": 0.68, "learning_rate": 0.0003270743058899053, "loss": 0.0672, "theoretical_loss": 3.428023599983567, "tokens_seen": 2231369728 }, { "epoch": 0.68, "learning_rate": 0.00032699406194832293, "loss": 0.0675, "theoretical_loss": 3.4279910149301798, "tokens_seen": 2231631872 }, { "epoch": 0.68, "learning_rate": 0.0003269138180067405, "loss": 0.0674, "theoretical_loss": 3.4279584347758565, "tokens_seen": 2231894016 }, { "epoch": 0.68, "learning_rate": 0.00032683357406515805, "loss": 0.065, "theoretical_loss": 3.4279258595192856, "tokens_seen": 2232156160 }, { "epoch": 0.68, "learning_rate": 0.00032675333012357566, "loss": 0.0649, "theoretical_loss": 3.427893289159156, "tokens_seen": 2232418304 }, { "epoch": 0.68, "learning_rate": 0.00032667308618199327, "loss": 0.0672, "theoretical_loss": 3.427860723694156, "tokens_seen": 2232680448 }, { "epoch": 0.68, "learning_rate": 0.0003265928422404109, "loss": 0.0677, "theoretical_loss": 3.427828163122976, "tokens_seen": 2232942592 }, { "epoch": 0.68, "learning_rate": 0.00032651259829882844, "loss": 0.067, "theoretical_loss": 3.427795607444306, "tokens_seen": 2233204736 }, { "epoch": 0.68, "learning_rate": 0.00032643235435724606, "loss": 0.0689, "theoretical_loss": 3.4277630566568367, "tokens_seen": 2233466880 }, { "epoch": 0.68, "learning_rate": 0.00032635211041566367, "loss": 0.0658, "theoretical_loss": 3.4277305107592593, "tokens_seen": 2233729024 }, { "epoch": 0.68, "learning_rate": 0.00032627186647408117, "loss": 0.0682, "theoretical_loss": 3.427697969750265, "tokens_seen": 2233991168 }, { "epoch": 0.68, "learning_rate": 0.0003261916225324988, "loss": 0.0684, "theoretical_loss": 3.427665433628547, "tokens_seen": 2234253312 }, { "epoch": 0.68, "learning_rate": 0.0003261113785909164, "loss": 0.0679, "theoretical_loss": 3.427632902392797, "tokens_seen": 2234515456 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.0003541047917678952, "objective/train/docs_used": 813661, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.345414400100708, "objective/train/original_loss": 1.345414400100708, "objective/train/theoretical_loss": 3.427600376041709, "objective/train/tokens_used": 2255237600, "objective/train/value_avg": -0.006504058837890625, "objective/train/value_loss": 0.00015749734302517027, "objective/train/value_max": -1.8656253814697266e-05, "objective/train/value_min": -0.7744140625, "objective/train/value_reward_corr": 0.7244407312411731, "objective/train/value_std": 0.0146942138671875, "objective/train/weight_avg": 1.000430703163147, "objective/train/weighted_lm_loss": 1.3458590507507324, "objective/train/weights_max": 1.8365892171859741, "objective/train/weights_min": 0.39913442730903625, "theoretical_loss": 3.427600376041709, "tokens_seen": 2234777600 }, { "epoch": 0.68, "learning_rate": 0.00032603113464933396, "loss": 0.0672, "theoretical_loss": 3.427600376041709, "tokens_seen": 2234777600 }, { "epoch": 0.68, "learning_rate": 0.00032595089070775157, "loss": 0.0647, "theoretical_loss": 3.4275678545739763, "tokens_seen": 2235039744 }, { "epoch": 0.68, "learning_rate": 0.0003258706467661692, "loss": 0.067, "theoretical_loss": 3.4275353379882927, "tokens_seen": 2235301888 }, { "epoch": 0.68, "learning_rate": 0.0003257904028245868, "loss": 0.0669, "theoretical_loss": 3.427502826283354, "tokens_seen": 2235564032 }, { "epoch": 0.68, "learning_rate": 0.0003257101588830043, "loss": 0.0668, "theoretical_loss": 3.427470319457854, "tokens_seen": 2235826176 }, { "epoch": 0.68, "learning_rate": 0.0003256299149414219, "loss": 0.0685, "theoretical_loss": 3.4274378175104894, "tokens_seen": 2236088320 }, { "epoch": 0.68, "learning_rate": 0.0003255496709998395, "loss": 0.0658, "theoretical_loss": 3.427405320439956, "tokens_seen": 2236350464 }, { "epoch": 0.68, "learning_rate": 0.0003254694270582571, "loss": 0.0656, "theoretical_loss": 3.4273728282449514, "tokens_seen": 2236612608 }, { "epoch": 0.68, "learning_rate": 0.0003253891831166747, "loss": 0.066, "theoretical_loss": 3.4273403409241716, "tokens_seen": 2236874752 }, { "epoch": 0.68, "learning_rate": 0.0003253089391750923, "loss": 0.0667, "theoretical_loss": 3.427307858476315, "tokens_seen": 2237136896 }, { "epoch": 0.68, "learning_rate": 0.0003252286952335099, "loss": 0.0689, "theoretical_loss": 3.4272753809000793, "tokens_seen": 2237399040 }, { "epoch": 0.68, "learning_rate": 0.0003251484512919275, "loss": 0.0675, "theoretical_loss": 3.4272429081941636, "tokens_seen": 2237661184 }, { "epoch": 0.68, "learning_rate": 0.00032506820735034504, "loss": 0.0693, "theoretical_loss": 3.4272104403572667, "tokens_seen": 2237923328 }, { "epoch": 0.68, "learning_rate": 0.00032498796340876265, "loss": 0.0667, "theoretical_loss": 3.4271779773880895, "tokens_seen": 2238185472 }, { "epoch": 0.68, "learning_rate": 0.0003249077194671802, "loss": 0.0657, "theoretical_loss": 3.427145519285331, "tokens_seen": 2238447616 }, { "epoch": 0.68, "learning_rate": 0.0003248274755255978, "loss": 0.0693, "theoretical_loss": 3.427113066047692, "tokens_seen": 2238709760 }, { "epoch": 0.68, "learning_rate": 0.00032474723158401543, "loss": 0.0687, "theoretical_loss": 3.4270806176738744, "tokens_seen": 2238971904 }, { "epoch": 0.68, "learning_rate": 0.00032466698764243304, "loss": 0.067, "theoretical_loss": 3.4270481741625796, "tokens_seen": 2239234048 }, { "epoch": 0.68, "learning_rate": 0.0003245867437008506, "loss": 0.0677, "theoretical_loss": 3.4270157355125095, "tokens_seen": 2239496192 }, { "epoch": 0.68, "learning_rate": 0.00032450649975926816, "loss": 0.0687, "theoretical_loss": 3.426983301722367, "tokens_seen": 2239758336 }, { "epoch": 0.68, "learning_rate": 0.0003244262558176858, "loss": 0.0668, "theoretical_loss": 3.4269508727908553, "tokens_seen": 2240020480 }, { "epoch": 0.68, "learning_rate": 0.00032434601187610333, "loss": 0.0667, "theoretical_loss": 3.426918448716678, "tokens_seen": 2240282624 }, { "epoch": 0.68, "learning_rate": 0.00032426576793452095, "loss": 0.068, "theoretical_loss": 3.42688602949854, "tokens_seen": 2240544768 }, { "epoch": 0.68, "learning_rate": 0.00032418552399293856, "loss": 0.064, "theoretical_loss": 3.426853615135145, "tokens_seen": 2240806912 }, { "epoch": 0.68, "learning_rate": 0.00032410528005135617, "loss": 0.0663, "theoretical_loss": 3.426821205625199, "tokens_seen": 2241069056 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.0005054355133324862, "objective/train/docs_used": 816099, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2898792028427124, "objective/train/original_loss": 1.2898790836334229, "objective/train/theoretical_loss": 3.4267888009674072, "objective/train/tokens_used": 2261791200, "objective/train/value_avg": -0.00934600830078125, "objective/train/value_loss": 0.000255974184256047, "objective/train/value_max": -1.0728836059570312e-05, "objective/train/value_min": -0.72900390625, "objective/train/value_reward_corr": 0.7769306544821992, "objective/train/value_std": 0.0211029052734375, "objective/train/weight_avg": 1.0006273984909058, "objective/train/weighted_lm_loss": 1.2898268699645996, "objective/train/weights_max": 1.6812392473220825, "objective/train/weights_min": 0.3951832354068756, "theoretical_loss": 3.4267888009674072, "tokens_seen": 2241331200 }, { "epoch": 0.68, "learning_rate": 0.00032402503610977373, "loss": 0.0653, "theoretical_loss": 3.4267888009674072, "tokens_seen": 2241331200 }, { "epoch": 0.68, "learning_rate": 0.0003239447921681913, "loss": 0.0664, "theoretical_loss": 3.4267564011604756, "tokens_seen": 2241593344 }, { "epoch": 0.68, "learning_rate": 0.0003238645482266089, "loss": 0.0651, "theoretical_loss": 3.426724006203112, "tokens_seen": 2241855488 }, { "epoch": 0.68, "learning_rate": 0.00032378430428502646, "loss": 0.0647, "theoretical_loss": 3.426691616094022, "tokens_seen": 2242117632 }, { "epoch": 0.68, "learning_rate": 0.00032370406034344407, "loss": 0.0679, "theoretical_loss": 3.4266592308319144, "tokens_seen": 2242379776 }, { "epoch": 0.68, "learning_rate": 0.0003236238164018617, "loss": 0.067, "theoretical_loss": 3.426626850415497, "tokens_seen": 2242641920 }, { "epoch": 0.68, "learning_rate": 0.00032354357246027924, "loss": 0.0662, "theoretical_loss": 3.4265944748434785, "tokens_seen": 2242904064 }, { "epoch": 0.68, "learning_rate": 0.00032346332851869685, "loss": 0.0662, "theoretical_loss": 3.4265621041145677, "tokens_seen": 2243166208 }, { "epoch": 0.68, "learning_rate": 0.00032338308457711447, "loss": 0.0642, "theoretical_loss": 3.426529738227475, "tokens_seen": 2243428352 }, { "epoch": 0.68, "learning_rate": 0.000323302840635532, "loss": 0.0643, "theoretical_loss": 3.4264973771809104, "tokens_seen": 2243690496 }, { "epoch": 0.68, "learning_rate": 0.0003232225966939496, "loss": 0.0666, "theoretical_loss": 3.426465020973584, "tokens_seen": 2243952640 }, { "epoch": 0.68, "learning_rate": 0.0003231423527523672, "loss": 0.0676, "theoretical_loss": 3.4264326696042073, "tokens_seen": 2244214784 }, { "epoch": 0.68, "learning_rate": 0.0003230621088107848, "loss": 0.069, "theoretical_loss": 3.4264003230714923, "tokens_seen": 2244476928 }, { "epoch": 0.68, "learning_rate": 0.00032298186486920237, "loss": 0.0651, "theoretical_loss": 3.4263679813741503, "tokens_seen": 2244739072 }, { "epoch": 0.68, "learning_rate": 0.00032290162092762, "loss": 0.068, "theoretical_loss": 3.4263356445108943, "tokens_seen": 2245001216 }, { "epoch": 0.68, "learning_rate": 0.0003228213769860376, "loss": 0.0648, "theoretical_loss": 3.426303312480438, "tokens_seen": 2245263360 }, { "epoch": 0.68, "learning_rate": 0.00032274113304445515, "loss": 0.0688, "theoretical_loss": 3.426270985281494, "tokens_seen": 2245525504 }, { "epoch": 0.68, "learning_rate": 0.0003226608891028727, "loss": 0.0667, "theoretical_loss": 3.426238662912777, "tokens_seen": 2245787648 }, { "epoch": 0.68, "learning_rate": 0.0003225806451612903, "loss": 0.0678, "theoretical_loss": 3.4262063453730014, "tokens_seen": 2246049792 }, { "epoch": 0.68, "learning_rate": 0.00032250040121970793, "loss": 0.0678, "theoretical_loss": 3.4261740326608825, "tokens_seen": 2246311936 }, { "epoch": 0.68, "learning_rate": 0.0003224201572781255, "loss": 0.0646, "theoretical_loss": 3.4261417247751353, "tokens_seen": 2246574080 }, { "epoch": 0.68, "learning_rate": 0.0003223399133365431, "loss": 0.0663, "theoretical_loss": 3.4261094217144765, "tokens_seen": 2246836224 }, { "epoch": 0.68, "learning_rate": 0.0003222596693949607, "loss": 0.0681, "theoretical_loss": 3.4260771234776226, "tokens_seen": 2247098368 }, { "epoch": 0.68, "learning_rate": 0.0003221794254533782, "loss": 0.0678, "theoretical_loss": 3.4260448300632906, "tokens_seen": 2247360512 }, { "epoch": 0.68, "learning_rate": 0.00032209918151179584, "loss": 0.0679, "theoretical_loss": 3.4260125414701976, "tokens_seen": 2247622656 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.0004595842328853905, "objective/train/docs_used": 818241, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.241592288017273, "objective/train/original_loss": 1.241592288017273, "objective/train/theoretical_loss": 3.4259802576970624, "objective/train/tokens_used": 2268344800, "objective/train/value_avg": -0.004749298095703125, "objective/train/value_loss": 0.00011237629951210693, "objective/train/value_max": -1.3947486877441406e-05, "objective/train/value_min": -0.71484375, "objective/train/value_reward_corr": 0.6671147463286998, "objective/train/value_std": 0.00948333740234375, "objective/train/weight_avg": 1.0005121231079102, "objective/train/weighted_lm_loss": 1.2421815395355225, "objective/train/weights_max": 1.2712233066558838, "objective/train/weights_min": 0.37275585532188416, "theoretical_loss": 3.4259802576970624, "tokens_seen": 2247884800 }, { "epoch": 0.68, "learning_rate": 0.00032201893757021345, "loss": 0.0648, "theoretical_loss": 3.4259802576970624, "tokens_seen": 2247884800 }, { "epoch": 0.68, "learning_rate": 0.00032193869362863106, "loss": 0.0671, "theoretical_loss": 3.425947978742603, "tokens_seen": 2248146944 }, { "epoch": 0.68, "learning_rate": 0.0003218584496870486, "loss": 0.0666, "theoretical_loss": 3.425915704605538, "tokens_seen": 2248409088 }, { "epoch": 0.68, "learning_rate": 0.00032177820574546623, "loss": 0.0664, "theoretical_loss": 3.4258834352845877, "tokens_seen": 2248671232 }, { "epoch": 0.68, "learning_rate": 0.00032169796180388384, "loss": 0.0666, "theoretical_loss": 3.425851170778472, "tokens_seen": 2248933376 }, { "epoch": 0.68, "learning_rate": 0.0003216177178623014, "loss": 0.0668, "theoretical_loss": 3.4258189110859107, "tokens_seen": 2249195520 }, { "epoch": 0.68, "learning_rate": 0.00032153747392071896, "loss": 0.0657, "theoretical_loss": 3.425786656205626, "tokens_seen": 2249457664 }, { "epoch": 0.68, "learning_rate": 0.0003214572299791366, "loss": 0.0671, "theoretical_loss": 3.425754406136338, "tokens_seen": 2249719808 }, { "epoch": 0.68, "learning_rate": 0.0003213769860375542, "loss": 0.0649, "theoretical_loss": 3.425722160876769, "tokens_seen": 2249981952 }, { "epoch": 0.68, "learning_rate": 0.00032129674209597174, "loss": 0.069, "theoretical_loss": 3.425689920425642, "tokens_seen": 2250244096 }, { "epoch": 0.68, "learning_rate": 0.00032121649815438936, "loss": 0.0667, "theoretical_loss": 3.42565768478168, "tokens_seen": 2250506240 }, { "epoch": 0.68, "learning_rate": 0.00032113625421280697, "loss": 0.0678, "theoretical_loss": 3.4256254539436055, "tokens_seen": 2250768384 }, { "epoch": 0.68, "learning_rate": 0.00032105601027122453, "loss": 0.0669, "theoretical_loss": 3.4255932279101433, "tokens_seen": 2251030528 }, { "epoch": 0.68, "learning_rate": 0.0003209757663296421, "loss": 0.0679, "theoretical_loss": 3.4255610066800166, "tokens_seen": 2251292672 }, { "epoch": 0.68, "learning_rate": 0.0003208955223880597, "loss": 0.0672, "theoretical_loss": 3.425528790251952, "tokens_seen": 2251554816 }, { "epoch": 0.68, "learning_rate": 0.0003208152784464773, "loss": 0.0673, "theoretical_loss": 3.4254965786246734, "tokens_seen": 2251816960 }, { "epoch": 0.68, "learning_rate": 0.00032073503450489487, "loss": 0.0677, "theoretical_loss": 3.4254643717969073, "tokens_seen": 2252079104 }, { "epoch": 0.68, "learning_rate": 0.0003206547905633125, "loss": 0.066, "theoretical_loss": 3.42543216976738, "tokens_seen": 2252341248 }, { "epoch": 0.68, "learning_rate": 0.0003205745466217301, "loss": 0.0645, "theoretical_loss": 3.425399972534818, "tokens_seen": 2252603392 }, { "epoch": 0.68, "learning_rate": 0.00032049430268014765, "loss": 0.0643, "theoretical_loss": 3.4253677800979494, "tokens_seen": 2252865536 }, { "epoch": 0.68, "learning_rate": 0.0003204140587385652, "loss": 0.0677, "theoretical_loss": 3.425335592455501, "tokens_seen": 2253127680 }, { "epoch": 0.68, "learning_rate": 0.0003203338147969828, "loss": 0.0668, "theoretical_loss": 3.4253034096062014, "tokens_seen": 2253389824 }, { "epoch": 0.68, "learning_rate": 0.00032025357085540044, "loss": 0.0706, "theoretical_loss": 3.4252712315487797, "tokens_seen": 2253651968 }, { "epoch": 0.68, "learning_rate": 0.000320173326913818, "loss": 0.0691, "theoretical_loss": 3.4252390582819645, "tokens_seen": 2253914112 }, { "epoch": 0.68, "learning_rate": 0.0003200930829722356, "loss": 0.0668, "theoretical_loss": 3.4252068898044863, "tokens_seen": 2254176256 }, { "epoch": 0.68, "objective/train/advantage_avg": -4.281154542695731e-05, "objective/train/docs_used": 820753, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3093576431274414, "objective/train/original_loss": 1.309357762336731, "objective/train/theoretical_loss": 3.425174726115075, "objective/train/tokens_used": 2274898400, "objective/train/value_avg": -0.0074005126953125, "objective/train/value_loss": 0.00020979443797841668, "objective/train/value_max": -3.045797348022461e-05, "objective/train/value_min": -0.39990234375, "objective/train/value_reward_corr": 0.6900546208748884, "objective/train/value_std": 0.013519287109375, "objective/train/weight_avg": 1.000051498413086, "objective/train/weighted_lm_loss": 1.309179425239563, "objective/train/weights_max": 1.2513753175735474, "objective/train/weights_min": 0.36851292848587036, "theoretical_loss": 3.425174726115075, "tokens_seen": 2254438400 }, { "epoch": 0.68, "learning_rate": 0.0003200128390306532, "loss": 0.0646, "theoretical_loss": 3.425174726115075, "tokens_seen": 2254438400 }, { "epoch": 0.68, "learning_rate": 0.0003199325950890708, "loss": 0.0659, "theoretical_loss": 3.425142567212461, "tokens_seen": 2254700544 }, { "epoch": 0.68, "learning_rate": 0.0003198523511474884, "loss": 0.0655, "theoretical_loss": 3.4251104130953762, "tokens_seen": 2254962688 }, { "epoch": 0.68, "learning_rate": 0.00031977210720590595, "loss": 0.0692, "theoretical_loss": 3.4250782637625514, "tokens_seen": 2255224832 }, { "epoch": 0.68, "learning_rate": 0.0003196918632643235, "loss": 0.0674, "theoretical_loss": 3.4250461192127193, "tokens_seen": 2255486976 }, { "epoch": 0.68, "learning_rate": 0.0003196116193227411, "loss": 0.0669, "theoretical_loss": 3.4250139794446124, "tokens_seen": 2255749120 }, { "epoch": 0.68, "learning_rate": 0.00031953137538115873, "loss": 0.0636, "theoretical_loss": 3.4249818444569637, "tokens_seen": 2256011264 }, { "epoch": 0.68, "learning_rate": 0.00031945113143957635, "loss": 0.0649, "theoretical_loss": 3.424949714248507, "tokens_seen": 2256273408 }, { "epoch": 0.68, "learning_rate": 0.0003193708874979939, "loss": 0.0664, "theoretical_loss": 3.4249175888179764, "tokens_seen": 2256535552 }, { "epoch": 0.68, "learning_rate": 0.0003192906435564115, "loss": 0.0646, "theoretical_loss": 3.424885468164106, "tokens_seen": 2256797696 }, { "epoch": 0.68, "learning_rate": 0.0003192103996148291, "loss": 0.0675, "theoretical_loss": 3.4248533522856315, "tokens_seen": 2257059840 }, { "epoch": 0.68, "learning_rate": 0.00031913015567324663, "loss": 0.0703, "theoretical_loss": 3.424821241181288, "tokens_seen": 2257321984 }, { "epoch": 0.68, "learning_rate": 0.00031904991173166425, "loss": 0.0676, "theoretical_loss": 3.4247891348498114, "tokens_seen": 2257584128 }, { "epoch": 0.68, "learning_rate": 0.00031896966779008186, "loss": 0.0651, "theoretical_loss": 3.424757033289939, "tokens_seen": 2257846272 }, { "epoch": 0.68, "learning_rate": 0.00031888942384849947, "loss": 0.0677, "theoretical_loss": 3.424724936500407, "tokens_seen": 2258108416 }, { "epoch": 0.68, "learning_rate": 0.00031880917990691703, "loss": 0.0658, "theoretical_loss": 3.424692844479953, "tokens_seen": 2258370560 }, { "epoch": 0.68, "learning_rate": 0.00031872893596533464, "loss": 0.0689, "theoretical_loss": 3.424660757227315, "tokens_seen": 2258632704 }, { "epoch": 0.68, "learning_rate": 0.00031864869202375226, "loss": 0.0696, "theoretical_loss": 3.4246286747412316, "tokens_seen": 2258894848 }, { "epoch": 0.68, "learning_rate": 0.00031856844808216976, "loss": 0.0691, "theoretical_loss": 3.4245965970204413, "tokens_seen": 2259156992 }, { "epoch": 0.68, "learning_rate": 0.0003184882041405874, "loss": 0.0661, "theoretical_loss": 3.4245645240636833, "tokens_seen": 2259419136 }, { "epoch": 0.68, "learning_rate": 0.000318407960199005, "loss": 0.0695, "theoretical_loss": 3.4245324558696986, "tokens_seen": 2259681280 }, { "epoch": 0.68, "learning_rate": 0.0003183277162574226, "loss": 0.0673, "theoretical_loss": 3.4245003924372264, "tokens_seen": 2259943424 }, { "epoch": 0.68, "learning_rate": 0.00031824747231584016, "loss": 0.0637, "theoretical_loss": 3.424468333765008, "tokens_seen": 2260205568 }, { "epoch": 0.69, "learning_rate": 0.00031816722837425777, "loss": 0.0672, "theoretical_loss": 3.4244362798517844, "tokens_seen": 2260467712 }, { "epoch": 0.69, "learning_rate": 0.0003180869844326754, "loss": 0.065, "theoretical_loss": 3.4244042306962976, "tokens_seen": 2260729856 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0001269732601940632, "objective/train/docs_used": 823330, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3321760892868042, "objective/train/original_loss": 1.3321759700775146, "objective/train/theoretical_loss": 3.42437218629729, "objective/train/tokens_used": 2281452000, "objective/train/value_avg": -0.01062774658203125, "objective/train/value_loss": 0.00037752685602754354, "objective/train/value_max": -3.451108932495117e-05, "objective/train/value_min": -0.71044921875, "objective/train/value_reward_corr": 0.7460287555104631, "objective/train/value_std": 0.02197265625, "objective/train/weight_avg": 1.000304102897644, "objective/train/weighted_lm_loss": 1.3322685956954956, "objective/train/weights_max": 1.8926640748977661, "objective/train/weights_min": 0.38069406151771545, "theoretical_loss": 3.42437218629729, "tokens_seen": 2260992000 }, { "epoch": 0.69, "learning_rate": 0.0003180067404910929, "loss": 0.0677, "theoretical_loss": 3.42437218629729, "tokens_seen": 2260992000 }, { "epoch": 0.69, "learning_rate": 0.0003179264965495105, "loss": 0.0685, "theoretical_loss": 3.4243401466535044, "tokens_seen": 2261254144 }, { "epoch": 0.69, "learning_rate": 0.0003178462526079281, "loss": 0.0697, "theoretical_loss": 3.424308111763683, "tokens_seen": 2261516288 }, { "epoch": 0.69, "learning_rate": 0.00031776600866634567, "loss": 0.0666, "theoretical_loss": 3.4242760816265707, "tokens_seen": 2261778432 }, { "epoch": 0.69, "learning_rate": 0.0003176857647247633, "loss": 0.0699, "theoretical_loss": 3.4242440562409113, "tokens_seen": 2262040576 }, { "epoch": 0.69, "learning_rate": 0.0003176055207831809, "loss": 0.0685, "theoretical_loss": 3.424212035605449, "tokens_seen": 2262302720 }, { "epoch": 0.69, "learning_rate": 0.0003175252768415985, "loss": 0.0694, "theoretical_loss": 3.4241800197189294, "tokens_seen": 2262564864 }, { "epoch": 0.69, "learning_rate": 0.000317445032900016, "loss": 0.0672, "theoretical_loss": 3.4241480085800977, "tokens_seen": 2262827008 }, { "epoch": 0.69, "learning_rate": 0.0003173647889584336, "loss": 0.0685, "theoretical_loss": 3.4241160021877004, "tokens_seen": 2263089152 }, { "epoch": 0.69, "learning_rate": 0.00031728454501685124, "loss": 0.0695, "theoretical_loss": 3.424084000540484, "tokens_seen": 2263351296 }, { "epoch": 0.69, "learning_rate": 0.0003172043010752688, "loss": 0.0662, "theoretical_loss": 3.424052003637195, "tokens_seen": 2263613440 }, { "epoch": 0.69, "learning_rate": 0.0003171240571336864, "loss": 0.0682, "theoretical_loss": 3.4240200114765815, "tokens_seen": 2263875584 }, { "epoch": 0.69, "learning_rate": 0.000317043813192104, "loss": 0.0653, "theoretical_loss": 3.423988024057391, "tokens_seen": 2264137728 }, { "epoch": 0.69, "learning_rate": 0.00031696356925052163, "loss": 0.0692, "theoretical_loss": 3.423956041378373, "tokens_seen": 2264399872 }, { "epoch": 0.69, "learning_rate": 0.0003168833253089392, "loss": 0.066, "theoretical_loss": 3.423924063438275, "tokens_seen": 2264662016 }, { "epoch": 0.69, "learning_rate": 0.00031680308136735675, "loss": 0.0656, "theoretical_loss": 3.4238920902358467, "tokens_seen": 2264924160 }, { "epoch": 0.69, "learning_rate": 0.00031672283742577436, "loss": 0.0665, "theoretical_loss": 3.423860121769839, "tokens_seen": 2265186304 }, { "epoch": 0.69, "learning_rate": 0.0003166425934841919, "loss": 0.0661, "theoretical_loss": 3.423828158039001, "tokens_seen": 2265448448 }, { "epoch": 0.69, "learning_rate": 0.00031656234954260953, "loss": 0.0695, "theoretical_loss": 3.4237961990420844, "tokens_seen": 2265710592 }, { "epoch": 0.69, "learning_rate": 0.00031648210560102715, "loss": 0.0668, "theoretical_loss": 3.42376424477784, "tokens_seen": 2265972736 }, { "epoch": 0.69, "learning_rate": 0.00031640186165944476, "loss": 0.0675, "theoretical_loss": 3.4237322952450198, "tokens_seen": 2266234880 }, { "epoch": 0.69, "learning_rate": 0.0003163216177178623, "loss": 0.0677, "theoretical_loss": 3.423700350442376, "tokens_seen": 2266497024 }, { "epoch": 0.69, "learning_rate": 0.0003162413737762799, "loss": 0.0659, "theoretical_loss": 3.4236684103686614, "tokens_seen": 2266759168 }, { "epoch": 0.69, "learning_rate": 0.0003161611298346975, "loss": 0.068, "theoretical_loss": 3.423636475022629, "tokens_seen": 2267021312 }, { "epoch": 0.69, "learning_rate": 0.00031608088589311505, "loss": 0.067, "theoretical_loss": 3.423604544403032, "tokens_seen": 2267283456 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.00016212603077292442, "objective/train/docs_used": 825734, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2646903991699219, "objective/train/original_loss": 1.2646903991699219, "objective/train/theoretical_loss": 3.4235726185086257, "objective/train/tokens_used": 2288005600, "objective/train/value_avg": -0.0071868896484375, "objective/train/value_loss": 0.0001659700064919889, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.3583984375, "objective/train/value_reward_corr": 0.6855385326256844, "objective/train/value_std": 0.01271820068359375, "objective/train/weight_avg": 1.0002398490905762, "objective/train/weighted_lm_loss": 1.2645337581634521, "objective/train/weights_max": 1.3612432479858398, "objective/train/weights_min": 0.37317126989364624, "theoretical_loss": 3.4235726185086257, "tokens_seen": 2267545600 }, { "epoch": 0.69, "learning_rate": 0.00031600064195153266, "loss": 0.0663, "theoretical_loss": 3.4235726185086257, "tokens_seen": 2267545600 }, { "epoch": 0.69, "learning_rate": 0.00031592039800995027, "loss": 0.0663, "theoretical_loss": 3.423540697338164, "tokens_seen": 2267807744 }, { "epoch": 0.69, "learning_rate": 0.0003158401540683679, "loss": 0.0673, "theoretical_loss": 3.423508780890402, "tokens_seen": 2268069888 }, { "epoch": 0.69, "learning_rate": 0.00031575991012678544, "loss": 0.0678, "theoretical_loss": 3.4234768691640953, "tokens_seen": 2268332032 }, { "epoch": 0.69, "learning_rate": 0.000315679666185203, "loss": 0.0661, "theoretical_loss": 3.4234449621580003, "tokens_seen": 2268594176 }, { "epoch": 0.69, "learning_rate": 0.0003155994222436206, "loss": 0.0669, "theoretical_loss": 3.4234130598708727, "tokens_seen": 2268856320 }, { "epoch": 0.69, "learning_rate": 0.00031551917830203817, "loss": 0.0667, "theoretical_loss": 3.42338116230147, "tokens_seen": 2269118464 }, { "epoch": 0.69, "learning_rate": 0.0003154389343604558, "loss": 0.0662, "theoretical_loss": 3.4233492694485497, "tokens_seen": 2269380608 }, { "epoch": 0.69, "learning_rate": 0.0003153586904188734, "loss": 0.0641, "theoretical_loss": 3.4233173813108695, "tokens_seen": 2269642752 }, { "epoch": 0.69, "learning_rate": 0.00031527844647729096, "loss": 0.0662, "theoretical_loss": 3.423285497887188, "tokens_seen": 2269904896 }, { "epoch": 0.69, "learning_rate": 0.00031519820253570857, "loss": 0.0672, "theoretical_loss": 3.4232536191762635, "tokens_seen": 2270167040 }, { "epoch": 0.69, "learning_rate": 0.0003151179585941262, "loss": 0.0683, "theoretical_loss": 3.423221745176856, "tokens_seen": 2270429184 }, { "epoch": 0.69, "learning_rate": 0.00031503771465254374, "loss": 0.0648, "theoretical_loss": 3.423189875887725, "tokens_seen": 2270691328 }, { "epoch": 0.69, "learning_rate": 0.0003149574707109613, "loss": 0.0648, "theoretical_loss": 3.4231580113076303, "tokens_seen": 2270953472 }, { "epoch": 0.69, "learning_rate": 0.0003148772267693789, "loss": 0.0669, "theoretical_loss": 3.4231261514353335, "tokens_seen": 2271215616 }, { "epoch": 0.69, "learning_rate": 0.0003147969828277965, "loss": 0.0649, "theoretical_loss": 3.4230942962695954, "tokens_seen": 2271477760 }, { "epoch": 0.69, "learning_rate": 0.0003147167388862141, "loss": 0.0707, "theoretical_loss": 3.4230624458091774, "tokens_seen": 2271739904 }, { "epoch": 0.69, "learning_rate": 0.0003146364949446317, "loss": 0.0684, "theoretical_loss": 3.423030600052842, "tokens_seen": 2272002048 }, { "epoch": 0.69, "learning_rate": 0.0003145562510030493, "loss": 0.0646, "theoretical_loss": 3.422998758999352, "tokens_seen": 2272264192 }, { "epoch": 0.69, "learning_rate": 0.00031447600706146687, "loss": 0.0629, "theoretical_loss": 3.4229669226474697, "tokens_seen": 2272526336 }, { "epoch": 0.69, "learning_rate": 0.0003143957631198844, "loss": 0.0669, "theoretical_loss": 3.422935090995959, "tokens_seen": 2272788480 }, { "epoch": 0.69, "learning_rate": 0.00031431551917830204, "loss": 0.0675, "theoretical_loss": 3.4229032640435846, "tokens_seen": 2273050624 }, { "epoch": 0.69, "learning_rate": 0.00031423527523671965, "loss": 0.0655, "theoretical_loss": 3.42287144178911, "tokens_seen": 2273312768 }, { "epoch": 0.69, "learning_rate": 0.0003141550312951372, "loss": 0.0715, "theoretical_loss": 3.4228396242313006, "tokens_seen": 2273574912 }, { "epoch": 0.69, "learning_rate": 0.0003140747873535548, "loss": 0.0647, "theoretical_loss": 3.422807811368922, "tokens_seen": 2273837056 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.001269850879907608, "objective/train/docs_used": 828122, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3244622945785522, "objective/train/original_loss": 1.3244624137878418, "objective/train/theoretical_loss": 3.422776003200739, "objective/train/tokens_used": 2294559200, "objective/train/value_avg": -0.007564544677734375, "objective/train/value_loss": 0.0001630596088944003, "objective/train/value_max": -2.0623207092285156e-05, "objective/train/value_min": -0.60986328125, "objective/train/value_reward_corr": 0.7459873437025508, "objective/train/value_std": 0.01558685302734375, "objective/train/weight_avg": 1.0013480186462402, "objective/train/weighted_lm_loss": 1.326235294342041, "objective/train/weights_max": 1.236796259880066, "objective/train/weights_min": 0.39109206199645996, "theoretical_loss": 3.422776003200739, "tokens_seen": 2274099200 }, { "epoch": 0.69, "learning_rate": 0.00031399454341197243, "loss": 0.0658, "theoretical_loss": 3.422776003200739, "tokens_seen": 2274099200 }, { "epoch": 0.69, "learning_rate": 0.00031391429947039004, "loss": 0.0709, "theoretical_loss": 3.4227441997255195, "tokens_seen": 2274361344 }, { "epoch": 0.69, "learning_rate": 0.00031383405552880755, "loss": 0.0695, "theoretical_loss": 3.4227124009420296, "tokens_seen": 2274623488 }, { "epoch": 0.69, "learning_rate": 0.00031375381158722516, "loss": 0.0655, "theoretical_loss": 3.422680606849036, "tokens_seen": 2274885632 }, { "epoch": 0.69, "learning_rate": 0.0003136735676456428, "loss": 0.0658, "theoretical_loss": 3.4226488174453076, "tokens_seen": 2275147776 }, { "epoch": 0.69, "learning_rate": 0.00031359332370406033, "loss": 0.0678, "theoretical_loss": 3.4226170327296113, "tokens_seen": 2275409920 }, { "epoch": 0.69, "learning_rate": 0.00031351307976247795, "loss": 0.07, "theoretical_loss": 3.422585252700717, "tokens_seen": 2275672064 }, { "epoch": 0.69, "learning_rate": 0.00031343283582089556, "loss": 0.0678, "theoretical_loss": 3.422553477357393, "tokens_seen": 2275934208 }, { "epoch": 0.69, "learning_rate": 0.0003133525918793131, "loss": 0.0669, "theoretical_loss": 3.4225217066984093, "tokens_seen": 2276196352 }, { "epoch": 0.69, "learning_rate": 0.0003132723479377307, "loss": 0.0645, "theoretical_loss": 3.422489940722536, "tokens_seen": 2276458496 }, { "epoch": 0.69, "learning_rate": 0.0003131921039961483, "loss": 0.0664, "theoretical_loss": 3.422458179428543, "tokens_seen": 2276720640 }, { "epoch": 0.69, "learning_rate": 0.0003131118600545659, "loss": 0.0651, "theoretical_loss": 3.422426422815202, "tokens_seen": 2276982784 }, { "epoch": 0.69, "learning_rate": 0.00031303161611298346, "loss": 0.0669, "theoretical_loss": 3.4223946708812845, "tokens_seen": 2277244928 }, { "epoch": 0.69, "learning_rate": 0.00031295137217140107, "loss": 0.0681, "theoretical_loss": 3.422362923625562, "tokens_seen": 2277507072 }, { "epoch": 0.69, "learning_rate": 0.0003128711282298187, "loss": 0.067, "theoretical_loss": 3.422331181046807, "tokens_seen": 2277769216 }, { "epoch": 0.69, "learning_rate": 0.00031279088428823624, "loss": 0.0666, "theoretical_loss": 3.4222994431437925, "tokens_seen": 2278031360 }, { "epoch": 0.69, "learning_rate": 0.0003127106403466538, "loss": 0.0658, "theoretical_loss": 3.4222677099152916, "tokens_seen": 2278293504 }, { "epoch": 0.69, "learning_rate": 0.0003126303964050714, "loss": 0.0666, "theoretical_loss": 3.4222359813600782, "tokens_seen": 2278555648 }, { "epoch": 0.69, "learning_rate": 0.000312550152463489, "loss": 0.0646, "theoretical_loss": 3.422204257476927, "tokens_seen": 2278817792 }, { "epoch": 0.69, "learning_rate": 0.0003124699085219066, "loss": 0.0645, "theoretical_loss": 3.4221725382646118, "tokens_seen": 2279079936 }, { "epoch": 0.69, "learning_rate": 0.0003123896645803242, "loss": 0.0663, "theoretical_loss": 3.422140823721908, "tokens_seen": 2279342080 }, { "epoch": 0.69, "learning_rate": 0.0003123094206387418, "loss": 0.0684, "theoretical_loss": 3.422109113847592, "tokens_seen": 2279604224 }, { "epoch": 0.69, "learning_rate": 0.00031222917669715937, "loss": 0.0644, "theoretical_loss": 3.422077408640439, "tokens_seen": 2279866368 }, { "epoch": 0.69, "learning_rate": 0.000312148932755577, "loss": 0.0671, "theoretical_loss": 3.422045708099226, "tokens_seen": 2280128512 }, { "epoch": 0.69, "learning_rate": 0.00031206868881399454, "loss": 0.0687, "theoretical_loss": 3.42201401222273, "tokens_seen": 2280390656 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0003086750512011349, "objective/train/docs_used": 830439, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2479599714279175, "objective/train/original_loss": 1.2479599714279175, "objective/train/theoretical_loss": 3.421982321009728, "objective/train/tokens_used": 2301112800, "objective/train/value_avg": -0.00862884521484375, "objective/train/value_loss": 0.0004476759640965611, "objective/train/value_max": -4.5418739318847656e-05, "objective/train/value_min": -0.66748046875, "objective/train/value_reward_corr": 0.6656916804020896, "objective/train/value_std": 0.0174102783203125, "objective/train/weight_avg": 1.0004971027374268, "objective/train/weighted_lm_loss": 1.2481622695922852, "objective/train/weights_max": 1.3128468990325928, "objective/train/weights_min": 0.22669236361980438, "theoretical_loss": 3.421982321009728, "tokens_seen": 2280652800 }, { "epoch": 0.69, "learning_rate": 0.00031198844487241215, "loss": 0.0665, "theoretical_loss": 3.421982321009728, "tokens_seen": 2280652800 }, { "epoch": 0.69, "learning_rate": 0.0003119082009308297, "loss": 0.0675, "theoretical_loss": 3.4219506344589985, "tokens_seen": 2280914944 }, { "epoch": 0.69, "learning_rate": 0.0003118279569892473, "loss": 0.0659, "theoretical_loss": 3.421918952569319, "tokens_seen": 2281177088 }, { "epoch": 0.69, "learning_rate": 0.00031174771304766494, "loss": 0.0643, "theoretical_loss": 3.4218872753394702, "tokens_seen": 2281439232 }, { "epoch": 0.69, "learning_rate": 0.0003116674691060825, "loss": 0.0663, "theoretical_loss": 3.4218556027682294, "tokens_seen": 2281701376 }, { "epoch": 0.69, "learning_rate": 0.0003115872251645001, "loss": 0.0645, "theoretical_loss": 3.4218239348543777, "tokens_seen": 2281963520 }, { "epoch": 0.69, "learning_rate": 0.00031150698122291766, "loss": 0.0677, "theoretical_loss": 3.4217922715966944, "tokens_seen": 2282225664 }, { "epoch": 0.69, "learning_rate": 0.0003114267372813352, "loss": 0.0662, "theoretical_loss": 3.421760612993961, "tokens_seen": 2282487808 }, { "epoch": 0.69, "learning_rate": 0.00031134649333975284, "loss": 0.0663, "theoretical_loss": 3.421728959044958, "tokens_seen": 2282749952 }, { "epoch": 0.69, "learning_rate": 0.00031126624939817045, "loss": 0.066, "theoretical_loss": 3.4216973097484678, "tokens_seen": 2283012096 }, { "epoch": 0.69, "learning_rate": 0.00031118600545658806, "loss": 0.0667, "theoretical_loss": 3.4216656651032715, "tokens_seen": 2283274240 }, { "epoch": 0.69, "learning_rate": 0.0003111057615150056, "loss": 0.0654, "theoretical_loss": 3.421634025108152, "tokens_seen": 2283536384 }, { "epoch": 0.69, "learning_rate": 0.00031102551757342323, "loss": 0.0659, "theoretical_loss": 3.421602389761893, "tokens_seen": 2283798528 }, { "epoch": 0.69, "learning_rate": 0.0003109452736318408, "loss": 0.0664, "theoretical_loss": 3.421570759063277, "tokens_seen": 2284060672 }, { "epoch": 0.69, "learning_rate": 0.00031086502969025835, "loss": 0.0668, "theoretical_loss": 3.4215391330110885, "tokens_seen": 2284322816 }, { "epoch": 0.69, "learning_rate": 0.00031078478574867596, "loss": 0.0677, "theoretical_loss": 3.421507511604111, "tokens_seen": 2284584960 }, { "epoch": 0.69, "learning_rate": 0.0003107045418070936, "loss": 0.065, "theoretical_loss": 3.42147589484113, "tokens_seen": 2284847104 }, { "epoch": 0.69, "learning_rate": 0.0003106242978655112, "loss": 0.0681, "theoretical_loss": 3.4214442827209313, "tokens_seen": 2285109248 }, { "epoch": 0.69, "learning_rate": 0.00031054405392392874, "loss": 0.0632, "theoretical_loss": 3.4214126752423, "tokens_seen": 2285371392 }, { "epoch": 0.69, "learning_rate": 0.00031046380998234636, "loss": 0.0653, "theoretical_loss": 3.4213810724040217, "tokens_seen": 2285633536 }, { "epoch": 0.69, "learning_rate": 0.00031038356604076397, "loss": 0.065, "theoretical_loss": 3.421349474204884, "tokens_seen": 2285895680 }, { "epoch": 0.69, "learning_rate": 0.0003103033220991815, "loss": 0.0661, "theoretical_loss": 3.4213178806436737, "tokens_seen": 2286157824 }, { "epoch": 0.69, "learning_rate": 0.0003102230781575991, "loss": 0.0637, "theoretical_loss": 3.4212862917191784, "tokens_seen": 2286419968 }, { "epoch": 0.69, "learning_rate": 0.0003101428342160167, "loss": 0.0659, "theoretical_loss": 3.4212547074301862, "tokens_seen": 2286682112 }, { "epoch": 0.69, "learning_rate": 0.0003100625902744343, "loss": 0.0645, "theoretical_loss": 3.4212231277754848, "tokens_seen": 2286944256 }, { "epoch": 0.69, "objective/train/advantage_avg": -0.0003998603788204491, "objective/train/docs_used": 832763, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.245110273361206, "objective/train/original_loss": 1.245110273361206, "objective/train/theoretical_loss": 3.4211915527538643, "objective/train/tokens_used": 2307666400, "objective/train/value_avg": -0.00817108154296875, "objective/train/value_loss": 0.00033646036172285676, "objective/train/value_max": -3.349781036376953e-05, "objective/train/value_min": -0.95166015625, "objective/train/value_reward_corr": 0.7729082380774197, "objective/train/value_std": 0.0215911865234375, "objective/train/weight_avg": 0.9997501373291016, "objective/train/weighted_lm_loss": 1.2448906898498535, "objective/train/weights_max": 1.6519445180892944, "objective/train/weights_min": 0.36829686164855957, "theoretical_loss": 3.4211915527538643, "tokens_seen": 2287206400 }, { "epoch": 0.69, "learning_rate": 0.00030998234633285187, "loss": 0.0652, "theoretical_loss": 3.4211915527538643, "tokens_seen": 2287206400 }, { "epoch": 0.69, "learning_rate": 0.0003099021023912695, "loss": 0.0649, "theoretical_loss": 3.4211599823641134, "tokens_seen": 2287468544 }, { "epoch": 0.69, "learning_rate": 0.0003098218584496871, "loss": 0.0668, "theoretical_loss": 3.421128416605022, "tokens_seen": 2287730688 }, { "epoch": 0.69, "learning_rate": 0.0003097416145081046, "loss": 0.0662, "theoretical_loss": 3.4210968554753807, "tokens_seen": 2287992832 }, { "epoch": 0.69, "learning_rate": 0.0003096613705665222, "loss": 0.0651, "theoretical_loss": 3.42106529897398, "tokens_seen": 2288254976 }, { "epoch": 0.69, "learning_rate": 0.0003095811266249398, "loss": 0.0669, "theoretical_loss": 3.4210337470996106, "tokens_seen": 2288517120 }, { "epoch": 0.69, "learning_rate": 0.0003095008826833574, "loss": 0.0664, "theoretical_loss": 3.4210021998510647, "tokens_seen": 2288779264 }, { "epoch": 0.69, "learning_rate": 0.000309420638741775, "loss": 0.0663, "theoretical_loss": 3.4209706572271346, "tokens_seen": 2289041408 }, { "epoch": 0.69, "learning_rate": 0.0003093403948001926, "loss": 0.0668, "theoretical_loss": 3.4209391192266128, "tokens_seen": 2289303552 }, { "epoch": 0.69, "learning_rate": 0.0003092601508586102, "loss": 0.0672, "theoretical_loss": 3.420907585848292, "tokens_seen": 2289565696 }, { "epoch": 0.69, "learning_rate": 0.0003091799069170278, "loss": 0.0669, "theoretical_loss": 3.4208760570909655, "tokens_seen": 2289827840 }, { "epoch": 0.69, "learning_rate": 0.00030909966297544534, "loss": 0.0649, "theoretical_loss": 3.420844532953428, "tokens_seen": 2290089984 }, { "epoch": 0.69, "learning_rate": 0.00030901941903386295, "loss": 0.0677, "theoretical_loss": 3.420813013434473, "tokens_seen": 2290352128 }, { "epoch": 0.69, "learning_rate": 0.0003089391750922805, "loss": 0.0655, "theoretical_loss": 3.4207814985328957, "tokens_seen": 2290614272 }, { "epoch": 0.69, "learning_rate": 0.0003088589311506981, "loss": 0.0655, "theoretical_loss": 3.4207499882474917, "tokens_seen": 2290876416 }, { "epoch": 0.69, "learning_rate": 0.00030877868720911573, "loss": 0.0632, "theoretical_loss": 3.4207184825770565, "tokens_seen": 2291138560 }, { "epoch": 0.69, "learning_rate": 0.00030869844326753335, "loss": 0.0643, "theoretical_loss": 3.420686981520386, "tokens_seen": 2291400704 }, { "epoch": 0.69, "learning_rate": 0.0003086181993259509, "loss": 0.0665, "theoretical_loss": 3.420655485076277, "tokens_seen": 2291662848 }, { "epoch": 0.69, "learning_rate": 0.00030853795538436846, "loss": 0.0652, "theoretical_loss": 3.420623993243527, "tokens_seen": 2291924992 }, { "epoch": 0.69, "learning_rate": 0.0003084577114427861, "loss": 0.0661, "theoretical_loss": 3.420592506020933, "tokens_seen": 2292187136 }, { "epoch": 0.69, "learning_rate": 0.00030837746750120363, "loss": 0.0646, "theoretical_loss": 3.420561023407293, "tokens_seen": 2292449280 }, { "epoch": 0.69, "learning_rate": 0.00030829722355962125, "loss": 0.0675, "theoretical_loss": 3.420529545401406, "tokens_seen": 2292711424 }, { "epoch": 0.69, "learning_rate": 0.00030821697961803886, "loss": 0.067, "theoretical_loss": 3.42049807200207, "tokens_seen": 2292973568 }, { "epoch": 0.69, "learning_rate": 0.00030813673567645647, "loss": 0.0659, "theoretical_loss": 3.420466603208085, "tokens_seen": 2293235712 }, { "epoch": 0.7, "learning_rate": 0.00030805649173487403, "loss": 0.0634, "theoretical_loss": 3.4204351390182506, "tokens_seen": 2293497856 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.001006623962894082, "objective/train/docs_used": 835219, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2900722026824951, "objective/train/original_loss": 1.2900723218917847, "objective/train/theoretical_loss": 3.4204036794313675, "objective/train/tokens_used": 2314220000, "objective/train/value_avg": -0.006023406982421875, "objective/train/value_loss": 0.00020214400137774646, "objective/train/value_max": -2.6702880859375e-05, "objective/train/value_min": -0.407470703125, "objective/train/value_reward_corr": 0.6690516829985103, "objective/train/value_std": 0.0127410888671875, "objective/train/weight_avg": 1.0010955333709717, "objective/train/weighted_lm_loss": 1.2918227910995483, "objective/train/weights_max": 1.4126380681991577, "objective/train/weights_min": 0.30655041337013245, "theoretical_loss": 3.4204036794313675, "tokens_seen": 2293760000 }, { "epoch": 0.7, "learning_rate": 0.0003079762477932916, "loss": 0.0674, "theoretical_loss": 3.4204036794313675, "tokens_seen": 2293760000 }, { "epoch": 0.7, "learning_rate": 0.0003078960038517092, "loss": 0.0672, "theoretical_loss": 3.4203722244462353, "tokens_seen": 2294022144 }, { "epoch": 0.7, "learning_rate": 0.00030781575991012676, "loss": 0.0693, "theoretical_loss": 3.4203407740616565, "tokens_seen": 2294284288 }, { "epoch": 0.7, "learning_rate": 0.0003077355159685444, "loss": 0.0697, "theoretical_loss": 3.4203093282764314, "tokens_seen": 2294546432 }, { "epoch": 0.7, "learning_rate": 0.000307655272026962, "loss": 0.0652, "theoretical_loss": 3.420277887089363, "tokens_seen": 2294808576 }, { "epoch": 0.7, "learning_rate": 0.0003075750280853796, "loss": 0.064, "theoretical_loss": 3.420246450499253, "tokens_seen": 2295070720 }, { "epoch": 0.7, "learning_rate": 0.00030749478414379716, "loss": 0.0642, "theoretical_loss": 3.420215018504905, "tokens_seen": 2295332864 }, { "epoch": 0.7, "learning_rate": 0.00030741454020221477, "loss": 0.0629, "theoretical_loss": 3.4201835911051224, "tokens_seen": 2295595008 }, { "epoch": 0.7, "learning_rate": 0.00030733429626063233, "loss": 0.0685, "theoretical_loss": 3.4201521682987086, "tokens_seen": 2295857152 }, { "epoch": 0.7, "learning_rate": 0.0003072540523190499, "loss": 0.0681, "theoretical_loss": 3.420120750084468, "tokens_seen": 2296119296 }, { "epoch": 0.7, "learning_rate": 0.0003071738083774675, "loss": 0.0672, "theoretical_loss": 3.4200893364612055, "tokens_seen": 2296381440 }, { "epoch": 0.7, "learning_rate": 0.0003070935644358851, "loss": 0.0688, "theoretical_loss": 3.4200579274277256, "tokens_seen": 2296643584 }, { "epoch": 0.7, "learning_rate": 0.00030701332049430267, "loss": 0.0669, "theoretical_loss": 3.420026522982835, "tokens_seen": 2296905728 }, { "epoch": 0.7, "learning_rate": 0.0003069330765527203, "loss": 0.0657, "theoretical_loss": 3.4199951231253394, "tokens_seen": 2297167872 }, { "epoch": 0.7, "learning_rate": 0.0003068528326111379, "loss": 0.0666, "theoretical_loss": 3.4199637278540447, "tokens_seen": 2297430016 }, { "epoch": 0.7, "learning_rate": 0.00030677258866955545, "loss": 0.0647, "theoretical_loss": 3.4199323371677584, "tokens_seen": 2297692160 }, { "epoch": 0.7, "learning_rate": 0.000306692344727973, "loss": 0.066, "theoretical_loss": 3.419900951065288, "tokens_seen": 2297954304 }, { "epoch": 0.7, "learning_rate": 0.0003066121007863906, "loss": 0.0661, "theoretical_loss": 3.419869569545441, "tokens_seen": 2298216448 }, { "epoch": 0.7, "learning_rate": 0.00030653185684480824, "loss": 0.0653, "theoretical_loss": 3.419838192607026, "tokens_seen": 2298478592 }, { "epoch": 0.7, "learning_rate": 0.0003064516129032258, "loss": 0.0629, "theoretical_loss": 3.4198068202488514, "tokens_seen": 2298740736 }, { "epoch": 0.7, "learning_rate": 0.0003063713689616434, "loss": 0.067, "theoretical_loss": 3.419775452469727, "tokens_seen": 2299002880 }, { "epoch": 0.7, "learning_rate": 0.000306291125020061, "loss": 0.0659, "theoretical_loss": 3.4197440892684616, "tokens_seen": 2299265024 }, { "epoch": 0.7, "learning_rate": 0.0003062108810784786, "loss": 0.0661, "theoretical_loss": 3.4197127306438664, "tokens_seen": 2299527168 }, { "epoch": 0.7, "learning_rate": 0.00030613063713689614, "loss": 0.0686, "theoretical_loss": 3.4196813765947507, "tokens_seen": 2299789312 }, { "epoch": 0.7, "learning_rate": 0.00030605039319531375, "loss": 0.0682, "theoretical_loss": 3.4196500271199266, "tokens_seen": 2300051456 }, { "epoch": 0.7, "objective/train/advantage_avg": 3.6831370380241424e-05, "objective/train/docs_used": 837515, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3956961631774902, "objective/train/original_loss": 1.3956961631774902, "objective/train/theoretical_loss": 3.4196186822182044, "objective/train/tokens_used": 2320773600, "objective/train/value_avg": -0.00783538818359375, "objective/train/value_loss": 0.0005023198900744319, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.91845703125, "objective/train/value_reward_corr": 0.6416035836441893, "objective/train/value_std": 0.0215606689453125, "objective/train/weight_avg": 1.0002703666687012, "objective/train/weighted_lm_loss": 1.3962715864181519, "objective/train/weights_max": 2.0975584983825684, "objective/train/weights_min": 0.368789941072464, "theoretical_loss": 3.4196186822182044, "tokens_seen": 2300313600 }, { "epoch": 0.7, "learning_rate": 0.00030597014925373136, "loss": 0.0679, "theoretical_loss": 3.4196186822182044, "tokens_seen": 2300313600 }, { "epoch": 0.7, "learning_rate": 0.0003058899053121489, "loss": 0.0652, "theoretical_loss": 3.419587341888397, "tokens_seen": 2300575744 }, { "epoch": 0.7, "learning_rate": 0.00030580966137056653, "loss": 0.0644, "theoretical_loss": 3.4195560061293158, "tokens_seen": 2300837888 }, { "epoch": 0.7, "learning_rate": 0.00030572941742898415, "loss": 0.0675, "theoretical_loss": 3.4195246749397743, "tokens_seen": 2301100032 }, { "epoch": 0.7, "learning_rate": 0.00030564917348740176, "loss": 0.0641, "theoretical_loss": 3.4194933483185856, "tokens_seen": 2301362176 }, { "epoch": 0.7, "learning_rate": 0.00030556892954581926, "loss": 0.0661, "theoretical_loss": 3.4194620262645627, "tokens_seen": 2301624320 }, { "epoch": 0.7, "learning_rate": 0.0003054886856042369, "loss": 0.0686, "theoretical_loss": 3.4194307087765203, "tokens_seen": 2301886464 }, { "epoch": 0.7, "learning_rate": 0.0003054084416626545, "loss": 0.0698, "theoretical_loss": 3.4193993958532722, "tokens_seen": 2302148608 }, { "epoch": 0.7, "learning_rate": 0.00030532819772107205, "loss": 0.0649, "theoretical_loss": 3.4193680874936345, "tokens_seen": 2302410752 }, { "epoch": 0.7, "learning_rate": 0.00030524795377948966, "loss": 0.0671, "theoretical_loss": 3.419336783696422, "tokens_seen": 2302672896 }, { "epoch": 0.7, "learning_rate": 0.00030516770983790727, "loss": 0.0666, "theoretical_loss": 3.41930548446045, "tokens_seen": 2302935040 }, { "epoch": 0.7, "learning_rate": 0.00030508746589632483, "loss": 0.0689, "theoretical_loss": 3.419274189784536, "tokens_seen": 2303197184 }, { "epoch": 0.7, "learning_rate": 0.0003050072219547424, "loss": 0.0677, "theoretical_loss": 3.4192428996674957, "tokens_seen": 2303459328 }, { "epoch": 0.7, "learning_rate": 0.00030492697801316, "loss": 0.0696, "theoretical_loss": 3.4192116141081463, "tokens_seen": 2303721472 }, { "epoch": 0.7, "learning_rate": 0.0003048467340715776, "loss": 0.0677, "theoretical_loss": 3.4191803331053063, "tokens_seen": 2303983616 }, { "epoch": 0.7, "learning_rate": 0.00030476649012999517, "loss": 0.0673, "theoretical_loss": 3.419149056657793, "tokens_seen": 2304245760 }, { "epoch": 0.7, "learning_rate": 0.0003046862461884128, "loss": 0.0647, "theoretical_loss": 3.4191177847644254, "tokens_seen": 2304507904 }, { "epoch": 0.7, "learning_rate": 0.0003046060022468304, "loss": 0.0675, "theoretical_loss": 3.419086517424022, "tokens_seen": 2304770048 }, { "epoch": 0.7, "learning_rate": 0.00030452575830524796, "loss": 0.0666, "theoretical_loss": 3.4190552546354023, "tokens_seen": 2305032192 }, { "epoch": 0.7, "learning_rate": 0.00030444551436366557, "loss": 0.0673, "theoretical_loss": 3.419023996397386, "tokens_seen": 2305294336 }, { "epoch": 0.7, "learning_rate": 0.0003043652704220831, "loss": 0.065, "theoretical_loss": 3.418992742708794, "tokens_seen": 2305556480 }, { "epoch": 0.7, "learning_rate": 0.00030428502648050074, "loss": 0.0654, "theoretical_loss": 3.418961493568446, "tokens_seen": 2305818624 }, { "epoch": 0.7, "learning_rate": 0.0003042047825389183, "loss": 0.0671, "theoretical_loss": 3.4189302489751636, "tokens_seen": 2306080768 }, { "epoch": 0.7, "learning_rate": 0.0003041245385973359, "loss": 0.0664, "theoretical_loss": 3.418899008927769, "tokens_seen": 2306342912 }, { "epoch": 0.7, "learning_rate": 0.0003040442946557535, "loss": 0.0658, "theoretical_loss": 3.4188677734250836, "tokens_seen": 2306605056 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.001095419516786933, "objective/train/docs_used": 839893, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.38678777217865, "objective/train/original_loss": 1.3867876529693604, "objective/train/theoretical_loss": 3.4188365424659293, "objective/train/tokens_used": 2327327200, "objective/train/value_avg": -0.0091094970703125, "objective/train/value_loss": 0.0002580058353487402, "objective/train/value_max": -4.3332576751708984e-05, "objective/train/value_min": -0.86572265625, "objective/train/value_reward_corr": 0.7183997951826784, "objective/train/value_std": 0.0175933837890625, "objective/train/weight_avg": 1.0012105703353882, "objective/train/weighted_lm_loss": 1.3880761861801147, "objective/train/weights_max": 1.3674609661102295, "objective/train/weights_min": 0.3692784309387207, "theoretical_loss": 3.4188365424659293, "tokens_seen": 2306867200 }, { "epoch": 0.7, "learning_rate": 0.0003039640507141711, "loss": 0.0671, "theoretical_loss": 3.4188365424659293, "tokens_seen": 2306867200 }, { "epoch": 0.7, "learning_rate": 0.0003038838067725887, "loss": 0.066, "theoretical_loss": 3.4188053160491303, "tokens_seen": 2307129344 }, { "epoch": 0.7, "learning_rate": 0.00030380356283100625, "loss": 0.0663, "theoretical_loss": 3.418774094173509, "tokens_seen": 2307391488 }, { "epoch": 0.7, "learning_rate": 0.00030372331888942387, "loss": 0.0659, "theoretical_loss": 3.4187428768378894, "tokens_seen": 2307653632 }, { "epoch": 0.7, "learning_rate": 0.0003036430749478414, "loss": 0.0671, "theoretical_loss": 3.4187116640410955, "tokens_seen": 2307915776 }, { "epoch": 0.7, "learning_rate": 0.00030356283100625904, "loss": 0.0657, "theoretical_loss": 3.418680455781953, "tokens_seen": 2308177920 }, { "epoch": 0.7, "learning_rate": 0.00030348258706467665, "loss": 0.0665, "theoretical_loss": 3.4186492520592853, "tokens_seen": 2308440064 }, { "epoch": 0.7, "learning_rate": 0.0003034023431230942, "loss": 0.0707, "theoretical_loss": 3.4186180528719188, "tokens_seen": 2308702208 }, { "epoch": 0.7, "learning_rate": 0.0003033220991815118, "loss": 0.0684, "theoretical_loss": 3.41858685821868, "tokens_seen": 2308964352 }, { "epoch": 0.7, "learning_rate": 0.0003032418552399294, "loss": 0.0682, "theoretical_loss": 3.418555668098395, "tokens_seen": 2309226496 }, { "epoch": 0.7, "learning_rate": 0.00030316161129834694, "loss": 0.0683, "theoretical_loss": 3.41852448250989, "tokens_seen": 2309488640 }, { "epoch": 0.7, "learning_rate": 0.00030308136735676455, "loss": 0.0653, "theoretical_loss": 3.4184933014519925, "tokens_seen": 2309750784 }, { "epoch": 0.7, "learning_rate": 0.00030300112341518216, "loss": 0.065, "theoretical_loss": 3.4184621249235305, "tokens_seen": 2310012928 }, { "epoch": 0.7, "learning_rate": 0.0003029208794735998, "loss": 0.0659, "theoretical_loss": 3.418430952923332, "tokens_seen": 2310275072 }, { "epoch": 0.7, "learning_rate": 0.00030284063553201733, "loss": 0.0685, "theoretical_loss": 3.418399785450226, "tokens_seen": 2310537216 }, { "epoch": 0.7, "learning_rate": 0.00030276039159043495, "loss": 0.0652, "theoretical_loss": 3.418368622503041, "tokens_seen": 2310799360 }, { "epoch": 0.7, "learning_rate": 0.00030268014764885256, "loss": 0.0659, "theoretical_loss": 3.4183374640806066, "tokens_seen": 2311061504 }, { "epoch": 0.7, "learning_rate": 0.00030259990370727006, "loss": 0.0671, "theoretical_loss": 3.4183063101817526, "tokens_seen": 2311323648 }, { "epoch": 0.7, "learning_rate": 0.0003025196597656877, "loss": 0.0686, "theoretical_loss": 3.41827516080531, "tokens_seen": 2311585792 }, { "epoch": 0.7, "learning_rate": 0.0003024394158241053, "loss": 0.0698, "theoretical_loss": 3.418244015950108, "tokens_seen": 2311847936 }, { "epoch": 0.7, "learning_rate": 0.0003023591718825229, "loss": 0.0654, "theoretical_loss": 3.418212875614979, "tokens_seen": 2312110080 }, { "epoch": 0.7, "learning_rate": 0.00030227892794094046, "loss": 0.0663, "theoretical_loss": 3.4181817397987553, "tokens_seen": 2312372224 }, { "epoch": 0.7, "learning_rate": 0.00030219868399935807, "loss": 0.0669, "theoretical_loss": 3.4181506085002673, "tokens_seen": 2312634368 }, { "epoch": 0.7, "learning_rate": 0.0003021184400577757, "loss": 0.0668, "theoretical_loss": 3.4181194817183487, "tokens_seen": 2312896512 }, { "epoch": 0.7, "learning_rate": 0.0003020381961161932, "loss": 0.0669, "theoretical_loss": 3.418088359451832, "tokens_seen": 2313158656 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.0010467276442795992, "objective/train/docs_used": 842373, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.314192771911621, "objective/train/original_loss": 1.314192771911621, "objective/train/theoretical_loss": 3.41805724169955, "objective/train/tokens_used": 2333880800, "objective/train/value_avg": -0.005832672119140625, "objective/train/value_loss": 0.00015348766464740038, "objective/train/value_max": -2.467632293701172e-05, "objective/train/value_min": -0.3525390625, "objective/train/value_reward_corr": 0.6610346152446436, "objective/train/value_std": 0.0113677978515625, "objective/train/weight_avg": 1.0011154413223267, "objective/train/weighted_lm_loss": 1.3154722452163696, "objective/train/weights_max": 1.3709473609924316, "objective/train/weights_min": 0.37706345319747925, "theoretical_loss": 3.41805724169955, "tokens_seen": 2313420800 }, { "epoch": 0.7, "learning_rate": 0.0003019579521746108, "loss": 0.0679, "theoretical_loss": 3.41805724169955, "tokens_seen": 2313420800 }, { "epoch": 0.7, "learning_rate": 0.0003018777082330284, "loss": 0.0677, "theoretical_loss": 3.4180261284603373, "tokens_seen": 2313682944 }, { "epoch": 0.7, "learning_rate": 0.000301797464291446, "loss": 0.0693, "theoretical_loss": 3.417995019733028, "tokens_seen": 2313945088 }, { "epoch": 0.7, "learning_rate": 0.0003017172203498636, "loss": 0.0661, "theoretical_loss": 3.4179639155164567, "tokens_seen": 2314207232 }, { "epoch": 0.7, "learning_rate": 0.0003016369764082812, "loss": 0.0667, "theoretical_loss": 3.4179328158094586, "tokens_seen": 2314469376 }, { "epoch": 0.7, "learning_rate": 0.0003015567324666988, "loss": 0.0662, "theoretical_loss": 3.417901720610869, "tokens_seen": 2314731520 }, { "epoch": 0.7, "learning_rate": 0.0003014764885251163, "loss": 0.0642, "theoretical_loss": 3.417870629919524, "tokens_seen": 2314993664 }, { "epoch": 0.7, "learning_rate": 0.0003013962445835339, "loss": 0.0678, "theoretical_loss": 3.41783954373426, "tokens_seen": 2315255808 }, { "epoch": 0.7, "learning_rate": 0.00030131600064195154, "loss": 0.0655, "theoretical_loss": 3.4178084620539138, "tokens_seen": 2315517952 }, { "epoch": 0.7, "learning_rate": 0.0003012357567003691, "loss": 0.0656, "theoretical_loss": 3.4177773848773225, "tokens_seen": 2315780096 }, { "epoch": 0.7, "learning_rate": 0.0003011555127587867, "loss": 0.068, "theoretical_loss": 3.4177463122033243, "tokens_seen": 2316042240 }, { "epoch": 0.7, "learning_rate": 0.0003010752688172043, "loss": 0.0672, "theoretical_loss": 3.4177152440307568, "tokens_seen": 2316304384 }, { "epoch": 0.7, "learning_rate": 0.00030099502487562194, "loss": 0.0672, "theoretical_loss": 3.417684180358459, "tokens_seen": 2316566528 }, { "epoch": 0.7, "learning_rate": 0.0003009147809340395, "loss": 0.0688, "theoretical_loss": 3.417653121185269, "tokens_seen": 2316828672 }, { "epoch": 0.7, "learning_rate": 0.00030083453699245705, "loss": 0.0704, "theoretical_loss": 3.4176220665100274, "tokens_seen": 2317090816 }, { "epoch": 0.7, "learning_rate": 0.00030075429305087466, "loss": 0.0681, "theoretical_loss": 3.4175910163315733, "tokens_seen": 2317352960 }, { "epoch": 0.7, "learning_rate": 0.0003006740491092922, "loss": 0.0684, "theoretical_loss": 3.417559970648747, "tokens_seen": 2317615104 }, { "epoch": 0.7, "learning_rate": 0.00030059380516770984, "loss": 0.0691, "theoretical_loss": 3.4175289294603894, "tokens_seen": 2317877248 }, { "epoch": 0.7, "learning_rate": 0.00030051356122612745, "loss": 0.069, "theoretical_loss": 3.4174978927653417, "tokens_seen": 2318139392 }, { "epoch": 0.7, "learning_rate": 0.00030043331728454506, "loss": 0.0684, "theoretical_loss": 3.4174668605624454, "tokens_seen": 2318401536 }, { "epoch": 0.7, "learning_rate": 0.0003003530733429626, "loss": 0.0667, "theoretical_loss": 3.4174358328505425, "tokens_seen": 2318663680 }, { "epoch": 0.7, "learning_rate": 0.0003002728294013802, "loss": 0.067, "theoretical_loss": 3.4174048096284757, "tokens_seen": 2318925824 }, { "epoch": 0.7, "learning_rate": 0.0003001925854597978, "loss": 0.0674, "theoretical_loss": 3.4173737908950876, "tokens_seen": 2319187968 }, { "epoch": 0.7, "learning_rate": 0.00030011234151821535, "loss": 0.0672, "theoretical_loss": 3.417342776649221, "tokens_seen": 2319450112 }, { "epoch": 0.7, "learning_rate": 0.00030003209757663296, "loss": 0.0698, "theoretical_loss": 3.4173117668897204, "tokens_seen": 2319712256 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.0005700485780835152, "objective/train/docs_used": 844817, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1989997625350952, "objective/train/original_loss": 1.1989996433258057, "objective/train/theoretical_loss": 3.4172807616154293, "objective/train/tokens_used": 2340434400, "objective/train/value_avg": -0.00766754150390625, "objective/train/value_loss": 0.00026566028827801347, "objective/train/value_max": -1.1861324310302734e-05, "objective/train/value_min": -0.97998046875, "objective/train/value_reward_corr": 0.7219315572173486, "objective/train/value_std": 0.01800537109375, "objective/train/weight_avg": 1.0006872415542603, "objective/train/weighted_lm_loss": 1.1993837356567383, "objective/train/weights_max": 2.4944355487823486, "objective/train/weights_min": 0.14852790534496307, "theoretical_loss": 3.4172807616154293, "tokens_seen": 2319974400 }, { "epoch": 0.7, "learning_rate": 0.0002999518536350506, "loss": 0.0653, "theoretical_loss": 3.4172807616154293, "tokens_seen": 2319974400 }, { "epoch": 0.7, "learning_rate": 0.0002998716096934682, "loss": 0.0679, "theoretical_loss": 3.417249760825193, "tokens_seen": 2320236544 }, { "epoch": 0.7, "learning_rate": 0.00029979136575188574, "loss": 0.065, "theoretical_loss": 3.417218764517856, "tokens_seen": 2320498688 }, { "epoch": 0.7, "learning_rate": 0.00029971112181030336, "loss": 0.0688, "theoretical_loss": 3.4171877726922633, "tokens_seen": 2320760832 }, { "epoch": 0.7, "learning_rate": 0.0002996308778687209, "loss": 0.067, "theoretical_loss": 3.417156785347262, "tokens_seen": 2321022976 }, { "epoch": 0.7, "learning_rate": 0.0002995506339271385, "loss": 0.0672, "theoretical_loss": 3.4171258024816975, "tokens_seen": 2321285120 }, { "epoch": 0.7, "learning_rate": 0.0002994703899855561, "loss": 0.0676, "theoretical_loss": 3.4170948240944163, "tokens_seen": 2321547264 }, { "epoch": 0.7, "learning_rate": 0.0002993901460439737, "loss": 0.0667, "theoretical_loss": 3.4170638501842663, "tokens_seen": 2321809408 }, { "epoch": 0.7, "learning_rate": 0.00029930990210239126, "loss": 0.0677, "theoretical_loss": 3.417032880750094, "tokens_seen": 2322071552 }, { "epoch": 0.7, "learning_rate": 0.00029922965816080887, "loss": 0.0658, "theoretical_loss": 3.417001915790749, "tokens_seen": 2322333696 }, { "epoch": 0.7, "learning_rate": 0.0002991494142192265, "loss": 0.0648, "theoretical_loss": 3.416970955305078, "tokens_seen": 2322595840 }, { "epoch": 0.7, "learning_rate": 0.00029906917027764404, "loss": 0.0676, "theoretical_loss": 3.416939999291931, "tokens_seen": 2322857984 }, { "epoch": 0.7, "learning_rate": 0.0002989889263360616, "loss": 0.0681, "theoretical_loss": 3.416909047750157, "tokens_seen": 2323120128 }, { "epoch": 0.7, "learning_rate": 0.0002989086823944792, "loss": 0.0674, "theoretical_loss": 3.4168781006786055, "tokens_seen": 2323382272 }, { "epoch": 0.7, "learning_rate": 0.0002988284384528968, "loss": 0.0684, "theoretical_loss": 3.4168471580761266, "tokens_seen": 2323644416 }, { "epoch": 0.7, "learning_rate": 0.0002987481945113144, "loss": 0.0672, "theoretical_loss": 3.416816219941571, "tokens_seen": 2323906560 }, { "epoch": 0.7, "learning_rate": 0.000298667950569732, "loss": 0.0661, "theoretical_loss": 3.4167852862737895, "tokens_seen": 2324168704 }, { "epoch": 0.7, "learning_rate": 0.0002985877066281496, "loss": 0.068, "theoretical_loss": 3.4167543570716337, "tokens_seen": 2324430848 }, { "epoch": 0.7, "learning_rate": 0.00029850746268656717, "loss": 0.066, "theoretical_loss": 3.416723432333956, "tokens_seen": 2324692992 }, { "epoch": 0.7, "learning_rate": 0.0002984272187449847, "loss": 0.0662, "theoretical_loss": 3.416692512059607, "tokens_seen": 2324955136 }, { "epoch": 0.7, "learning_rate": 0.00029834697480340234, "loss": 0.065, "theoretical_loss": 3.416661596247441, "tokens_seen": 2325217280 }, { "epoch": 0.7, "learning_rate": 0.00029826673086181995, "loss": 0.0694, "theoretical_loss": 3.41663068489631, "tokens_seen": 2325479424 }, { "epoch": 0.7, "learning_rate": 0.0002981864869202375, "loss": 0.0656, "theoretical_loss": 3.4165997780050685, "tokens_seen": 2325741568 }, { "epoch": 0.7, "learning_rate": 0.0002981062429786551, "loss": 0.0675, "theoretical_loss": 3.41656887557257, "tokens_seen": 2326003712 }, { "epoch": 0.7, "learning_rate": 0.00029802599903707273, "loss": 0.0687, "theoretical_loss": 3.416537977597668, "tokens_seen": 2326265856 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.0007894880254752934, "objective/train/docs_used": 847012, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3645339012145996, "objective/train/original_loss": 1.3645339012145996, "objective/train/theoretical_loss": 3.4165070840792184, "objective/train/tokens_used": 2346988000, "objective/train/value_avg": -0.00891876220703125, "objective/train/value_loss": 0.0002483201096765697, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.890625, "objective/train/value_reward_corr": 0.7192212149918833, "objective/train/value_std": 0.0171661376953125, "objective/train/weight_avg": 1.0008996725082397, "objective/train/weighted_lm_loss": 1.3655369281768799, "objective/train/weights_max": 1.242430329322815, "objective/train/weights_min": 0.3732339143753052, "theoretical_loss": 3.4165070840792184, "tokens_seen": 2326528000 }, { "epoch": 0.71, "learning_rate": 0.00029794575509549035, "loss": 0.0674, "theoretical_loss": 3.4165070840792184, "tokens_seen": 2326528000 }, { "epoch": 0.71, "learning_rate": 0.00029786551115390785, "loss": 0.0663, "theoretical_loss": 3.4164761950160765, "tokens_seen": 2326790144 }, { "epoch": 0.71, "learning_rate": 0.00029778526721232546, "loss": 0.0661, "theoretical_loss": 3.4164453104070973, "tokens_seen": 2327052288 }, { "epoch": 0.71, "learning_rate": 0.0002977050232707431, "loss": 0.0645, "theoretical_loss": 3.416414430251137, "tokens_seen": 2327314432 }, { "epoch": 0.71, "learning_rate": 0.00029762477932916063, "loss": 0.0678, "theoretical_loss": 3.4163835545470524, "tokens_seen": 2327576576 }, { "epoch": 0.71, "learning_rate": 0.00029754453538757825, "loss": 0.0616, "theoretical_loss": 3.4163526832937, "tokens_seen": 2327838720 }, { "epoch": 0.71, "learning_rate": 0.00029746429144599586, "loss": 0.0671, "theoretical_loss": 3.4163218164899374, "tokens_seen": 2328100864 }, { "epoch": 0.71, "learning_rate": 0.00029738404750441347, "loss": 0.0663, "theoretical_loss": 3.416290954134622, "tokens_seen": 2328363008 }, { "epoch": 0.71, "learning_rate": 0.000297303803562831, "loss": 0.068, "theoretical_loss": 3.4162600962266128, "tokens_seen": 2328625152 }, { "epoch": 0.71, "learning_rate": 0.0002972235596212486, "loss": 0.0672, "theoretical_loss": 3.4162292427647674, "tokens_seen": 2328887296 }, { "epoch": 0.71, "learning_rate": 0.0002971433156796662, "loss": 0.0665, "theoretical_loss": 3.4161983937479454, "tokens_seen": 2329149440 }, { "epoch": 0.71, "learning_rate": 0.00029706307173808376, "loss": 0.0658, "theoretical_loss": 3.4161675491750056, "tokens_seen": 2329411584 }, { "epoch": 0.71, "learning_rate": 0.0002969828277965014, "loss": 0.0683, "theoretical_loss": 3.416136709044809, "tokens_seen": 2329673728 }, { "epoch": 0.71, "learning_rate": 0.000296902583854919, "loss": 0.0651, "theoretical_loss": 3.416105873356215, "tokens_seen": 2329935872 }, { "epoch": 0.71, "learning_rate": 0.00029682233991333654, "loss": 0.0665, "theoretical_loss": 3.4160750421080843, "tokens_seen": 2330198016 }, { "epoch": 0.71, "learning_rate": 0.0002967420959717541, "loss": 0.066, "theoretical_loss": 3.4160442152992783, "tokens_seen": 2330460160 }, { "epoch": 0.71, "learning_rate": 0.0002966618520301717, "loss": 0.0674, "theoretical_loss": 3.4160133929286585, "tokens_seen": 2330722304 }, { "epoch": 0.71, "learning_rate": 0.00029658160808858933, "loss": 0.0671, "theoretical_loss": 3.4159825749950867, "tokens_seen": 2330984448 }, { "epoch": 0.71, "learning_rate": 0.0002965013641470069, "loss": 0.0684, "theoretical_loss": 3.4159517614974257, "tokens_seen": 2331246592 }, { "epoch": 0.71, "learning_rate": 0.0002964211202054245, "loss": 0.0668, "theoretical_loss": 3.4159209524345373, "tokens_seen": 2331508736 }, { "epoch": 0.71, "learning_rate": 0.0002963408762638421, "loss": 0.0669, "theoretical_loss": 3.4158901478052863, "tokens_seen": 2331770880 }, { "epoch": 0.71, "learning_rate": 0.00029626063232225967, "loss": 0.0692, "theoretical_loss": 3.4158593476085346, "tokens_seen": 2332033024 }, { "epoch": 0.71, "learning_rate": 0.0002961803883806773, "loss": 0.0664, "theoretical_loss": 3.4158285518431475, "tokens_seen": 2332295168 }, { "epoch": 0.71, "learning_rate": 0.00029610014443909484, "loss": 0.0689, "theoretical_loss": 3.4157977605079894, "tokens_seen": 2332557312 }, { "epoch": 0.71, "learning_rate": 0.00029601990049751245, "loss": 0.0634, "theoretical_loss": 3.415766973601924, "tokens_seen": 2332819456 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.00032847325201146305, "objective/train/docs_used": 849464, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3709079027175903, "objective/train/original_loss": 1.3709079027175903, "objective/train/theoretical_loss": 3.415736191123818, "objective/train/tokens_used": 2353541600, "objective/train/value_avg": -0.00714111328125, "objective/train/value_loss": 0.00020859837241005152, "objective/train/value_max": -2.8848648071289062e-05, "objective/train/value_min": -0.58837890625, "objective/train/value_reward_corr": 0.6817947286926539, "objective/train/value_std": 0.0138092041015625, "objective/train/weight_avg": 1.000423550605774, "objective/train/weighted_lm_loss": 1.3710265159606934, "objective/train/weights_max": 1.3413829803466797, "objective/train/weights_min": 0.3711100220680237, "theoretical_loss": 3.415736191123818, "tokens_seen": 2333081600 }, { "epoch": 0.71, "learning_rate": 0.00029593965655593, "loss": 0.0665, "theoretical_loss": 3.415736191123818, "tokens_seen": 2333081600 }, { "epoch": 0.71, "learning_rate": 0.0002958594126143476, "loss": 0.0673, "theoretical_loss": 3.4157054130725366, "tokens_seen": 2333343744 }, { "epoch": 0.71, "learning_rate": 0.00029577916867276524, "loss": 0.0687, "theoretical_loss": 3.415674639446946, "tokens_seen": 2333605888 }, { "epoch": 0.71, "learning_rate": 0.0002956989247311828, "loss": 0.0674, "theoretical_loss": 3.415643870245913, "tokens_seen": 2333868032 }, { "epoch": 0.71, "learning_rate": 0.0002956186807896004, "loss": 0.0683, "theoretical_loss": 3.4156131054683034, "tokens_seen": 2334130176 }, { "epoch": 0.71, "learning_rate": 0.00029553843684801797, "loss": 0.068, "theoretical_loss": 3.415582345112986, "tokens_seen": 2334392320 }, { "epoch": 0.71, "learning_rate": 0.0002954581929064356, "loss": 0.0716, "theoretical_loss": 3.415551589178828, "tokens_seen": 2334654464 }, { "epoch": 0.71, "learning_rate": 0.00029537794896485314, "loss": 0.0678, "theoretical_loss": 3.415520837664698, "tokens_seen": 2334916608 }, { "epoch": 0.71, "learning_rate": 0.00029529770502327075, "loss": 0.0659, "theoretical_loss": 3.415490090569464, "tokens_seen": 2335178752 }, { "epoch": 0.71, "learning_rate": 0.00029521746108168836, "loss": 0.0686, "theoretical_loss": 3.415459347891996, "tokens_seen": 2335440896 }, { "epoch": 0.71, "learning_rate": 0.0002951372171401059, "loss": 0.0658, "theoretical_loss": 3.4154286096311623, "tokens_seen": 2335703040 }, { "epoch": 0.71, "learning_rate": 0.00029505697319852353, "loss": 0.0644, "theoretical_loss": 3.4153978757858336, "tokens_seen": 2335965184 }, { "epoch": 0.71, "learning_rate": 0.00029497672925694115, "loss": 0.0661, "theoretical_loss": 3.41536714635488, "tokens_seen": 2336227328 }, { "epoch": 0.71, "learning_rate": 0.00029489648531535865, "loss": 0.0683, "theoretical_loss": 3.4153364213371726, "tokens_seen": 2336489472 }, { "epoch": 0.71, "learning_rate": 0.00029481624137377626, "loss": 0.0655, "theoretical_loss": 3.415305700731582, "tokens_seen": 2336751616 }, { "epoch": 0.71, "learning_rate": 0.0002947359974321939, "loss": 0.0683, "theoretical_loss": 3.41527498453698, "tokens_seen": 2337013760 }, { "epoch": 0.71, "learning_rate": 0.0002946557534906115, "loss": 0.0684, "theoretical_loss": 3.4152442727522385, "tokens_seen": 2337275904 }, { "epoch": 0.71, "learning_rate": 0.00029457550954902905, "loss": 0.0696, "theoretical_loss": 3.41521356537623, "tokens_seen": 2337538048 }, { "epoch": 0.71, "learning_rate": 0.00029449526560744666, "loss": 0.0668, "theoretical_loss": 3.415182862407827, "tokens_seen": 2337800192 }, { "epoch": 0.71, "learning_rate": 0.00029441502166586427, "loss": 0.0663, "theoretical_loss": 3.4151521638459035, "tokens_seen": 2338062336 }, { "epoch": 0.71, "learning_rate": 0.0002943347777242818, "loss": 0.0698, "theoretical_loss": 3.4151214696893324, "tokens_seen": 2338324480 }, { "epoch": 0.71, "learning_rate": 0.0002942545337826994, "loss": 0.0672, "theoretical_loss": 3.4150907799369876, "tokens_seen": 2338586624 }, { "epoch": 0.71, "learning_rate": 0.000294174289841117, "loss": 0.0677, "theoretical_loss": 3.4150600945877443, "tokens_seen": 2338848768 }, { "epoch": 0.71, "learning_rate": 0.0002940940458995346, "loss": 0.0691, "theoretical_loss": 3.415029413640477, "tokens_seen": 2339110912 }, { "epoch": 0.71, "learning_rate": 0.00029401380195795217, "loss": 0.0672, "theoretical_loss": 3.414998737094061, "tokens_seen": 2339373056 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.00047332874964922667, "objective/train/docs_used": 851815, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3133671283721924, "objective/train/original_loss": 1.3133671283721924, "objective/train/theoretical_loss": 3.414968064947372, "objective/train/tokens_used": 2360095200, "objective/train/value_avg": -0.0105438232421875, "objective/train/value_loss": 0.0004383322084322572, "objective/train/value_max": -3.218650817871094e-05, "objective/train/value_min": -0.97802734375, "objective/train/value_reward_corr": 0.7493675175185559, "objective/train/value_std": 0.0269317626953125, "objective/train/weight_avg": 1.0006917715072632, "objective/train/weighted_lm_loss": 1.313127875328064, "objective/train/weights_max": 2.2706074714660645, "objective/train/weights_min": 0.37235790491104126, "theoretical_loss": 3.414968064947372, "tokens_seen": 2339635200 }, { "epoch": 0.71, "learning_rate": 0.0002939335580163698, "loss": 0.0672, "theoretical_loss": 3.414968064947372, "tokens_seen": 2339635200 }, { "epoch": 0.71, "learning_rate": 0.0002938533140747874, "loss": 0.0661, "theoretical_loss": 3.4149373971992856, "tokens_seen": 2339897344 }, { "epoch": 0.71, "learning_rate": 0.0002937730701332049, "loss": 0.0678, "theoretical_loss": 3.4149067338486794, "tokens_seen": 2340159488 }, { "epoch": 0.71, "learning_rate": 0.0002936928261916225, "loss": 0.0665, "theoretical_loss": 3.4148760748944293, "tokens_seen": 2340421632 }, { "epoch": 0.71, "learning_rate": 0.0002936125822500401, "loss": 0.0661, "theoretical_loss": 3.4148454203354133, "tokens_seen": 2340683776 }, { "epoch": 0.71, "learning_rate": 0.00029353233830845774, "loss": 0.0673, "theoretical_loss": 3.4148147701705094, "tokens_seen": 2340945920 }, { "epoch": 0.71, "learning_rate": 0.0002934520943668753, "loss": 0.0669, "theoretical_loss": 3.414784124398595, "tokens_seen": 2341208064 }, { "epoch": 0.71, "learning_rate": 0.0002933718504252929, "loss": 0.0688, "theoretical_loss": 3.4147534830185493, "tokens_seen": 2341470208 }, { "epoch": 0.71, "learning_rate": 0.0002932916064837105, "loss": 0.0676, "theoretical_loss": 3.414722846029251, "tokens_seen": 2341732352 }, { "epoch": 0.71, "learning_rate": 0.0002932113625421281, "loss": 0.0695, "theoretical_loss": 3.4146922134295794, "tokens_seen": 2341994496 }, { "epoch": 0.71, "learning_rate": 0.00029313111860054564, "loss": 0.0686, "theoretical_loss": 3.414661585218415, "tokens_seen": 2342256640 }, { "epoch": 0.71, "learning_rate": 0.00029305087465896325, "loss": 0.0682, "theoretical_loss": 3.4146309613946366, "tokens_seen": 2342518784 }, { "epoch": 0.71, "learning_rate": 0.0002929706307173808, "loss": 0.0677, "theoretical_loss": 3.4146003419571267, "tokens_seen": 2342780928 }, { "epoch": 0.71, "learning_rate": 0.0002928903867757984, "loss": 0.0682, "theoretical_loss": 3.414569726904765, "tokens_seen": 2343043072 }, { "epoch": 0.71, "learning_rate": 0.00029281014283421604, "loss": 0.0672, "theoretical_loss": 3.414539116236434, "tokens_seen": 2343305216 }, { "epoch": 0.71, "learning_rate": 0.00029272989889263365, "loss": 0.0662, "theoretical_loss": 3.4145085099510144, "tokens_seen": 2343567360 }, { "epoch": 0.71, "learning_rate": 0.0002926496549510512, "loss": 0.0674, "theoretical_loss": 3.4144779080473895, "tokens_seen": 2343829504 }, { "epoch": 0.71, "learning_rate": 0.00029256941100946877, "loss": 0.0649, "theoretical_loss": 3.4144473105244413, "tokens_seen": 2344091648 }, { "epoch": 0.71, "learning_rate": 0.0002924891670678864, "loss": 0.0668, "theoretical_loss": 3.4144167173810533, "tokens_seen": 2344353792 }, { "epoch": 0.71, "learning_rate": 0.00029240892312630394, "loss": 0.0679, "theoretical_loss": 3.414386128616109, "tokens_seen": 2344615936 }, { "epoch": 0.71, "learning_rate": 0.00029232867918472155, "loss": 0.0685, "theoretical_loss": 3.414355544228492, "tokens_seen": 2344878080 }, { "epoch": 0.71, "learning_rate": 0.00029224843524313916, "loss": 0.0697, "theoretical_loss": 3.414324964217087, "tokens_seen": 2345140224 }, { "epoch": 0.71, "learning_rate": 0.0002921681913015568, "loss": 0.0657, "theoretical_loss": 3.414294388580779, "tokens_seen": 2345402368 }, { "epoch": 0.71, "learning_rate": 0.00029208794735997433, "loss": 0.0662, "theoretical_loss": 3.4142638173184525, "tokens_seen": 2345664512 }, { "epoch": 0.71, "learning_rate": 0.0002920077034183919, "loss": 0.0662, "theoretical_loss": 3.4142332504289934, "tokens_seen": 2345926656 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.0004487581318244338, "objective/train/docs_used": 854185, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4248061180114746, "objective/train/original_loss": 1.4248061180114746, "objective/train/theoretical_loss": 3.414202687911288, "objective/train/tokens_used": 2366648800, "objective/train/value_avg": -0.00934600830078125, "objective/train/value_loss": 0.00034865766065195203, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.97119140625, "objective/train/value_reward_corr": 0.7621885055068649, "objective/train/value_std": 0.0230865478515625, "objective/train/weight_avg": 1.0006086826324463, "objective/train/weighted_lm_loss": 1.4253432750701904, "objective/train/weights_max": 1.7495906352996826, "objective/train/weights_min": 0.3696533143520355, "theoretical_loss": 3.414202687911288, "tokens_seen": 2346188800 }, { "epoch": 0.71, "learning_rate": 0.0002919274594768095, "loss": 0.0682, "theoretical_loss": 3.414202687911288, "tokens_seen": 2346188800 }, { "epoch": 0.71, "learning_rate": 0.00029184721553522706, "loss": 0.0673, "theoretical_loss": 3.414172129764222, "tokens_seen": 2346450944 }, { "epoch": 0.71, "learning_rate": 0.0002917669715936447, "loss": 0.0682, "theoretical_loss": 3.414141575986682, "tokens_seen": 2346713088 }, { "epoch": 0.71, "learning_rate": 0.0002916867276520623, "loss": 0.0669, "theoretical_loss": 3.4141110265775563, "tokens_seen": 2346975232 }, { "epoch": 0.71, "learning_rate": 0.0002916064837104799, "loss": 0.0665, "theoretical_loss": 3.414080481535732, "tokens_seen": 2347237376 }, { "epoch": 0.71, "learning_rate": 0.00029152623976889746, "loss": 0.0644, "theoretical_loss": 3.414049940860097, "tokens_seen": 2347499520 }, { "epoch": 0.71, "learning_rate": 0.00029144599582731507, "loss": 0.0666, "theoretical_loss": 3.4140194045495393, "tokens_seen": 2347761664 }, { "epoch": 0.71, "learning_rate": 0.00029136575188573263, "loss": 0.066, "theoretical_loss": 3.413988872602949, "tokens_seen": 2348023808 }, { "epoch": 0.71, "learning_rate": 0.0002912855079441502, "loss": 0.066, "theoretical_loss": 3.4139583450192137, "tokens_seen": 2348285952 }, { "epoch": 0.71, "learning_rate": 0.0002912052640025678, "loss": 0.0657, "theoretical_loss": 3.413927821797224, "tokens_seen": 2348548096 }, { "epoch": 0.71, "learning_rate": 0.0002911250200609854, "loss": 0.0681, "theoretical_loss": 3.41389730293587, "tokens_seen": 2348810240 }, { "epoch": 0.71, "learning_rate": 0.00029104477611940297, "loss": 0.0651, "theoretical_loss": 3.413866788434042, "tokens_seen": 2349072384 }, { "epoch": 0.71, "learning_rate": 0.0002909645321778206, "loss": 0.068, "theoretical_loss": 3.413836278290631, "tokens_seen": 2349334528 }, { "epoch": 0.71, "learning_rate": 0.0002908842882362382, "loss": 0.0673, "theoretical_loss": 3.4138057725045274, "tokens_seen": 2349596672 }, { "epoch": 0.71, "learning_rate": 0.00029080404429465576, "loss": 0.0698, "theoretical_loss": 3.413775271074624, "tokens_seen": 2349858816 }, { "epoch": 0.71, "learning_rate": 0.0002907238003530733, "loss": 0.0672, "theoretical_loss": 3.413744773999812, "tokens_seen": 2350120960 }, { "epoch": 0.71, "learning_rate": 0.0002906435564114909, "loss": 0.0654, "theoretical_loss": 3.413714281278985, "tokens_seen": 2350383104 }, { "epoch": 0.71, "learning_rate": 0.00029056331246990854, "loss": 0.0674, "theoretical_loss": 3.413683792911035, "tokens_seen": 2350645248 }, { "epoch": 0.71, "learning_rate": 0.0002904830685283261, "loss": 0.067, "theoretical_loss": 3.413653308894855, "tokens_seen": 2350907392 }, { "epoch": 0.71, "learning_rate": 0.0002904028245867437, "loss": 0.0658, "theoretical_loss": 3.4136228292293396, "tokens_seen": 2351169536 }, { "epoch": 0.71, "learning_rate": 0.0002903225806451613, "loss": 0.0671, "theoretical_loss": 3.4135923539133826, "tokens_seen": 2351431680 }, { "epoch": 0.71, "learning_rate": 0.00029024233670357894, "loss": 0.067, "theoretical_loss": 3.413561882945878, "tokens_seen": 2351693824 }, { "epoch": 0.71, "learning_rate": 0.00029016209276199644, "loss": 0.0659, "theoretical_loss": 3.4135314163257213, "tokens_seen": 2351955968 }, { "epoch": 0.71, "learning_rate": 0.00029008184882041405, "loss": 0.0674, "theoretical_loss": 3.4135009540518073, "tokens_seen": 2352218112 }, { "epoch": 0.71, "learning_rate": 0.00029000160487883166, "loss": 0.0657, "theoretical_loss": 3.4134704961230327, "tokens_seen": 2352480256 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.0002187528443755582, "objective/train/docs_used": 856640, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3601878881454468, "objective/train/original_loss": 1.3601877689361572, "objective/train/theoretical_loss": 3.4134400425382925, "objective/train/tokens_used": 2373202400, "objective/train/value_avg": -0.0091094970703125, "objective/train/value_loss": 0.00019813631661236286, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.423095703125, "objective/train/value_reward_corr": 0.7711988085987357, "objective/train/value_std": 0.0168914794921875, "objective/train/weight_avg": 1.0003117322921753, "objective/train/weighted_lm_loss": 1.3602980375289917, "objective/train/weights_max": 1.232275366783142, "objective/train/weights_min": 0.3712516129016876, "theoretical_loss": 3.4134400425382925, "tokens_seen": 2352742400 }, { "epoch": 0.71, "learning_rate": 0.0002899213609372492, "loss": 0.0671, "theoretical_loss": 3.4134400425382925, "tokens_seen": 2352742400 }, { "epoch": 0.71, "learning_rate": 0.00028984111699566684, "loss": 0.0668, "theoretical_loss": 3.4134095932964836, "tokens_seen": 2353004544 }, { "epoch": 0.71, "learning_rate": 0.00028976087305408445, "loss": 0.068, "theoretical_loss": 3.413379148396503, "tokens_seen": 2353266688 }, { "epoch": 0.71, "learning_rate": 0.00028968062911250206, "loss": 0.0653, "theoretical_loss": 3.413348707837248, "tokens_seen": 2353528832 }, { "epoch": 0.71, "learning_rate": 0.00028960038517091957, "loss": 0.0667, "theoretical_loss": 3.4133182716176167, "tokens_seen": 2353790976 }, { "epoch": 0.71, "learning_rate": 0.0002895201412293372, "loss": 0.0651, "theoretical_loss": 3.4132878397365065, "tokens_seen": 2354053120 }, { "epoch": 0.71, "learning_rate": 0.0002894398972877548, "loss": 0.0665, "theoretical_loss": 3.4132574121928165, "tokens_seen": 2354315264 }, { "epoch": 0.71, "learning_rate": 0.00028935965334617235, "loss": 0.0661, "theoretical_loss": 3.413226988985446, "tokens_seen": 2354577408 }, { "epoch": 0.71, "learning_rate": 0.00028927940940458996, "loss": 0.0651, "theoretical_loss": 3.4131965701132927, "tokens_seen": 2354839552 }, { "epoch": 0.71, "learning_rate": 0.0002891991654630076, "loss": 0.0641, "theoretical_loss": 3.4131661555752584, "tokens_seen": 2355101696 }, { "epoch": 0.71, "learning_rate": 0.0002891189215214252, "loss": 0.0663, "theoretical_loss": 3.413135745370242, "tokens_seen": 2355363840 }, { "epoch": 0.71, "learning_rate": 0.0002890386775798427, "loss": 0.0676, "theoretical_loss": 3.4131053394971445, "tokens_seen": 2355625984 }, { "epoch": 0.71, "learning_rate": 0.0002889584336382603, "loss": 0.0669, "theoretical_loss": 3.4130749379548666, "tokens_seen": 2355888128 }, { "epoch": 0.71, "learning_rate": 0.0002888781896966779, "loss": 0.0671, "theoretical_loss": 3.41304454074231, "tokens_seen": 2356150272 }, { "epoch": 0.71, "learning_rate": 0.0002887979457550955, "loss": 0.0664, "theoretical_loss": 3.4130141478583758, "tokens_seen": 2356412416 }, { "epoch": 0.71, "learning_rate": 0.0002887177018135131, "loss": 0.0662, "theoretical_loss": 3.4129837593019667, "tokens_seen": 2356674560 }, { "epoch": 0.71, "learning_rate": 0.0002886374578719307, "loss": 0.0672, "theoretical_loss": 3.412953375071985, "tokens_seen": 2356936704 }, { "epoch": 0.71, "learning_rate": 0.00028855721393034826, "loss": 0.0663, "theoretical_loss": 3.412922995167334, "tokens_seen": 2357198848 }, { "epoch": 0.71, "learning_rate": 0.00028847696998876587, "loss": 0.0663, "theoretical_loss": 3.4128926195869163, "tokens_seen": 2357460992 }, { "epoch": 0.71, "learning_rate": 0.00028839672604718343, "loss": 0.0635, "theoretical_loss": 3.4128622483296365, "tokens_seen": 2357723136 }, { "epoch": 0.71, "learning_rate": 0.00028831648210560104, "loss": 0.0674, "theoretical_loss": 3.412831881394399, "tokens_seen": 2357985280 }, { "epoch": 0.71, "learning_rate": 0.0002882362381640186, "loss": 0.0669, "theoretical_loss": 3.4128015187801073, "tokens_seen": 2358247424 }, { "epoch": 0.71, "learning_rate": 0.0002881559942224362, "loss": 0.0649, "theoretical_loss": 3.4127711604856668, "tokens_seen": 2358509568 }, { "epoch": 0.71, "learning_rate": 0.0002880757502808538, "loss": 0.0665, "theoretical_loss": 3.4127408065099827, "tokens_seen": 2358771712 }, { "epoch": 0.71, "learning_rate": 0.0002879955063392714, "loss": 0.0666, "theoretical_loss": 3.4127104568519613, "tokens_seen": 2359033856 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.0003526402579154819, "objective/train/docs_used": 858941, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2617586851119995, "objective/train/original_loss": 1.261758804321289, "objective/train/theoretical_loss": 3.412680111510508, "objective/train/tokens_used": 2379756000, "objective/train/value_avg": -0.00748443603515625, "objective/train/value_loss": 0.00020228726498316973, "objective/train/value_max": -3.272294998168945e-05, "objective/train/value_min": -0.41162109375, "objective/train/value_reward_corr": 0.7067248936371501, "objective/train/value_std": 0.0146942138671875, "objective/train/weight_avg": 1.0004441738128662, "objective/train/weighted_lm_loss": 1.2619435787200928, "objective/train/weights_max": 1.3642196655273438, "objective/train/weights_min": 0.36796626448631287, "theoretical_loss": 3.412680111510508, "tokens_seen": 2359296000 }, { "epoch": 0.71, "learning_rate": 0.000287915262397689, "loss": 0.0646, "theoretical_loss": 3.412680111510508, "tokens_seen": 2359296000 }, { "epoch": 0.72, "learning_rate": 0.00028783501845610655, "loss": 0.0677, "theoretical_loss": 3.41264977048453, "tokens_seen": 2359558144 }, { "epoch": 0.72, "learning_rate": 0.00028775477451452417, "loss": 0.0694, "theoretical_loss": 3.412619433772934, "tokens_seen": 2359820288 }, { "epoch": 0.72, "learning_rate": 0.0002876745305729417, "loss": 0.0676, "theoretical_loss": 3.412589101374627, "tokens_seen": 2360082432 }, { "epoch": 0.72, "learning_rate": 0.00028759428663135934, "loss": 0.0665, "theoretical_loss": 3.4125587732885174, "tokens_seen": 2360344576 }, { "epoch": 0.72, "learning_rate": 0.00028751404268977695, "loss": 0.0663, "theoretical_loss": 3.4125284495135126, "tokens_seen": 2360606720 }, { "epoch": 0.72, "learning_rate": 0.0002874337987481945, "loss": 0.0666, "theoretical_loss": 3.412498130048522, "tokens_seen": 2360868864 }, { "epoch": 0.72, "learning_rate": 0.0002873535548066121, "loss": 0.0681, "theoretical_loss": 3.412467814892454, "tokens_seen": 2361131008 }, { "epoch": 0.72, "learning_rate": 0.0002872733108650297, "loss": 0.0661, "theoretical_loss": 3.4124375040442176, "tokens_seen": 2361393152 }, { "epoch": 0.72, "learning_rate": 0.0002871930669234473, "loss": 0.0671, "theoretical_loss": 3.4124071975027235, "tokens_seen": 2361655296 }, { "epoch": 0.72, "learning_rate": 0.00028711282298186485, "loss": 0.0668, "theoretical_loss": 3.4123768952668807, "tokens_seen": 2361917440 }, { "epoch": 0.72, "learning_rate": 0.00028703257904028246, "loss": 0.0677, "theoretical_loss": 3.412346597335601, "tokens_seen": 2362179584 }, { "epoch": 0.72, "learning_rate": 0.0002869523350987001, "loss": 0.0684, "theoretical_loss": 3.4123163037077937, "tokens_seen": 2362441728 }, { "epoch": 0.72, "learning_rate": 0.00028687209115711763, "loss": 0.0659, "theoretical_loss": 3.4122860143823717, "tokens_seen": 2362703872 }, { "epoch": 0.72, "learning_rate": 0.00028679184721553525, "loss": 0.0649, "theoretical_loss": 3.4122557293582454, "tokens_seen": 2362966016 }, { "epoch": 0.72, "learning_rate": 0.00028671160327395286, "loss": 0.0663, "theoretical_loss": 3.412225448634328, "tokens_seen": 2363228160 }, { "epoch": 0.72, "learning_rate": 0.00028663135933237036, "loss": 0.0659, "theoretical_loss": 3.4121951722095316, "tokens_seen": 2363490304 }, { "epoch": 0.72, "learning_rate": 0.000286551115390788, "loss": 0.0687, "theoretical_loss": 3.412164900082769, "tokens_seen": 2363752448 }, { "epoch": 0.72, "learning_rate": 0.0002864708714492056, "loss": 0.065, "theoretical_loss": 3.412134632252953, "tokens_seen": 2364014592 }, { "epoch": 0.72, "learning_rate": 0.0002863906275076232, "loss": 0.0642, "theoretical_loss": 3.4121043687189982, "tokens_seen": 2364276736 }, { "epoch": 0.72, "learning_rate": 0.00028631038356604076, "loss": 0.0693, "theoretical_loss": 3.4120741094798186, "tokens_seen": 2364538880 }, { "epoch": 0.72, "learning_rate": 0.0002862301396244584, "loss": 0.0656, "theoretical_loss": 3.412043854534328, "tokens_seen": 2364801024 }, { "epoch": 0.72, "learning_rate": 0.000286149895682876, "loss": 0.0657, "theoretical_loss": 3.4120136038814417, "tokens_seen": 2365063168 }, { "epoch": 0.72, "learning_rate": 0.0002860696517412935, "loss": 0.0674, "theoretical_loss": 3.411983357520075, "tokens_seen": 2365325312 }, { "epoch": 0.72, "learning_rate": 0.0002859894077997111, "loss": 0.0659, "theoretical_loss": 3.4119531154491436, "tokens_seen": 2365587456 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.0008152511436492205, "objective/train/docs_used": 861421, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1844955682754517, "objective/train/original_loss": 1.1844955682754517, "objective/train/theoretical_loss": 3.411922877667563, "objective/train/tokens_used": 2386309600, "objective/train/value_avg": -0.00860595703125, "objective/train/value_loss": 0.00024558245786465704, "objective/train/value_max": -2.4318695068359375e-05, "objective/train/value_min": -0.7431640625, "objective/train/value_reward_corr": 0.7812741348390095, "objective/train/value_std": 0.0189056396484375, "objective/train/weight_avg": 1.0009241104125977, "objective/train/weighted_lm_loss": 1.1853077411651611, "objective/train/weights_max": 1.2296350002288818, "objective/train/weights_min": 0.3681640923023224, "theoretical_loss": 3.411922877667563, "tokens_seen": 2365849600 }, { "epoch": 0.72, "learning_rate": 0.0002859091638581287, "loss": 0.0651, "theoretical_loss": 3.411922877667563, "tokens_seen": 2365849600 }, { "epoch": 0.72, "learning_rate": 0.00028582891991654633, "loss": 0.0663, "theoretical_loss": 3.4118926441742508, "tokens_seen": 2366111744 }, { "epoch": 0.72, "learning_rate": 0.0002857486759749639, "loss": 0.0655, "theoretical_loss": 3.4118624149681227, "tokens_seen": 2366373888 }, { "epoch": 0.72, "learning_rate": 0.0002856684320333815, "loss": 0.0651, "theoretical_loss": 3.4118321900480963, "tokens_seen": 2366636032 }, { "epoch": 0.72, "learning_rate": 0.0002855881880917991, "loss": 0.0659, "theoretical_loss": 3.4118019694130894, "tokens_seen": 2366898176 }, { "epoch": 0.72, "learning_rate": 0.00028550794415021667, "loss": 0.0692, "theoretical_loss": 3.41177175306202, "tokens_seen": 2367160320 }, { "epoch": 0.72, "learning_rate": 0.00028542770020863423, "loss": 0.0669, "theoretical_loss": 3.4117415409938068, "tokens_seen": 2367422464 }, { "epoch": 0.72, "learning_rate": 0.00028534745626705184, "loss": 0.0667, "theoretical_loss": 3.411711333207368, "tokens_seen": 2367684608 }, { "epoch": 0.72, "learning_rate": 0.00028526721232546945, "loss": 0.0659, "theoretical_loss": 3.411681129701623, "tokens_seen": 2367946752 }, { "epoch": 0.72, "learning_rate": 0.000285186968383887, "loss": 0.0649, "theoretical_loss": 3.411650930475491, "tokens_seen": 2368208896 }, { "epoch": 0.72, "learning_rate": 0.0002851067244423046, "loss": 0.066, "theoretical_loss": 3.4116207355278934, "tokens_seen": 2368471040 }, { "epoch": 0.72, "learning_rate": 0.00028502648050072224, "loss": 0.0669, "theoretical_loss": 3.411590544857749, "tokens_seen": 2368733184 }, { "epoch": 0.72, "learning_rate": 0.0002849462365591398, "loss": 0.0674, "theoretical_loss": 3.4115603584639795, "tokens_seen": 2368995328 }, { "epoch": 0.72, "learning_rate": 0.00028486599261755735, "loss": 0.0651, "theoretical_loss": 3.4115301763455057, "tokens_seen": 2369257472 }, { "epoch": 0.72, "learning_rate": 0.00028478574867597497, "loss": 0.0666, "theoretical_loss": 3.411499998501249, "tokens_seen": 2369519616 }, { "epoch": 0.72, "learning_rate": 0.0002847055047343925, "loss": 0.0668, "theoretical_loss": 3.4114698249301316, "tokens_seen": 2369781760 }, { "epoch": 0.72, "learning_rate": 0.00028462526079281014, "loss": 0.0657, "theoretical_loss": 3.411439655631076, "tokens_seen": 2370043904 }, { "epoch": 0.72, "learning_rate": 0.00028454501685122775, "loss": 0.0665, "theoretical_loss": 3.4114094906030052, "tokens_seen": 2370306048 }, { "epoch": 0.72, "learning_rate": 0.00028446477290964536, "loss": 0.0672, "theoretical_loss": 3.4113793298448414, "tokens_seen": 2370568192 }, { "epoch": 0.72, "learning_rate": 0.0002843845289680629, "loss": 0.0681, "theoretical_loss": 3.411349173355508, "tokens_seen": 2370830336 }, { "epoch": 0.72, "learning_rate": 0.0002843042850264805, "loss": 0.0691, "theoretical_loss": 3.4113190211339304, "tokens_seen": 2371092480 }, { "epoch": 0.72, "learning_rate": 0.0002842240410848981, "loss": 0.0677, "theoretical_loss": 3.4112888731790316, "tokens_seen": 2371354624 }, { "epoch": 0.72, "learning_rate": 0.00028414379714331565, "loss": 0.0662, "theoretical_loss": 3.4112587294897367, "tokens_seen": 2371616768 }, { "epoch": 0.72, "learning_rate": 0.00028406355320173326, "loss": 0.0666, "theoretical_loss": 3.4112285900649706, "tokens_seen": 2371878912 }, { "epoch": 0.72, "learning_rate": 0.0002839833092601509, "loss": 0.0649, "theoretical_loss": 3.411198454903659, "tokens_seen": 2372141056 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.00061773881316185, "objective/train/docs_used": 863878, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.301668643951416, "objective/train/original_loss": 1.3016687631607056, "objective/train/theoretical_loss": 3.4111683240047275, "objective/train/tokens_used": 2392863200, "objective/train/value_avg": -0.007083892822265625, "objective/train/value_loss": 0.00021664539235644042, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.90478515625, "objective/train/value_reward_corr": 0.6975712737975295, "objective/train/value_std": 0.0143280029296875, "objective/train/weight_avg": 1.0007141828536987, "objective/train/weighted_lm_loss": 1.3028697967529297, "objective/train/weights_max": 1.1462258100509644, "objective/train/weights_min": 0.37651151418685913, "theoretical_loss": 3.4111683240047275, "tokens_seen": 2372403200 }, { "epoch": 0.72, "learning_rate": 0.0002839030653185685, "loss": 0.0669, "theoretical_loss": 3.4111683240047275, "tokens_seen": 2372403200 }, { "epoch": 0.72, "learning_rate": 0.00028382282137698605, "loss": 0.0625, "theoretical_loss": 3.4111381973671024, "tokens_seen": 2372665344 }, { "epoch": 0.72, "learning_rate": 0.00028374257743540366, "loss": 0.0653, "theoretical_loss": 3.4111080749897105, "tokens_seen": 2372927488 }, { "epoch": 0.72, "learning_rate": 0.0002836623334938212, "loss": 0.067, "theoretical_loss": 3.4110779568714786, "tokens_seen": 2373189632 }, { "epoch": 0.72, "learning_rate": 0.0002835820895522388, "loss": 0.0673, "theoretical_loss": 3.4110478430113345, "tokens_seen": 2373451776 }, { "epoch": 0.72, "learning_rate": 0.0002835018456106564, "loss": 0.0642, "theoretical_loss": 3.4110177334082055, "tokens_seen": 2373713920 }, { "epoch": 0.72, "learning_rate": 0.000283421601669074, "loss": 0.0683, "theoretical_loss": 3.4109876280610196, "tokens_seen": 2373976064 }, { "epoch": 0.72, "learning_rate": 0.0002833413577274916, "loss": 0.0624, "theoretical_loss": 3.4109575269687062, "tokens_seen": 2374238208 }, { "epoch": 0.72, "learning_rate": 0.00028326111378590917, "loss": 0.0681, "theoretical_loss": 3.4109274301301937, "tokens_seen": 2374500352 }, { "epoch": 0.72, "learning_rate": 0.0002831808698443268, "loss": 0.0647, "theoretical_loss": 3.410897337544412, "tokens_seen": 2374762496 }, { "epoch": 0.72, "learning_rate": 0.00028310062590274434, "loss": 0.0645, "theoretical_loss": 3.41086724921029, "tokens_seen": 2375024640 }, { "epoch": 0.72, "learning_rate": 0.0002830203819611619, "loss": 0.0669, "theoretical_loss": 3.410837165126758, "tokens_seen": 2375286784 }, { "epoch": 0.72, "learning_rate": 0.0002829401380195795, "loss": 0.0677, "theoretical_loss": 3.4108070852927472, "tokens_seen": 2375548928 }, { "epoch": 0.72, "learning_rate": 0.00028285989407799713, "loss": 0.0656, "theoretical_loss": 3.4107770097071874, "tokens_seen": 2375811072 }, { "epoch": 0.72, "learning_rate": 0.0002827796501364147, "loss": 0.065, "theoretical_loss": 3.410746938369011, "tokens_seen": 2376073216 }, { "epoch": 0.72, "learning_rate": 0.0002826994061948323, "loss": 0.068, "theoretical_loss": 3.410716871277149, "tokens_seen": 2376335360 }, { "epoch": 0.72, "learning_rate": 0.0002826191622532499, "loss": 0.065, "theoretical_loss": 3.4106868084305337, "tokens_seen": 2376597504 }, { "epoch": 0.72, "learning_rate": 0.00028253891831166747, "loss": 0.0676, "theoretical_loss": 3.4106567498280973, "tokens_seen": 2376859648 }, { "epoch": 0.72, "learning_rate": 0.00028245867437008503, "loss": 0.0673, "theoretical_loss": 3.410626695468773, "tokens_seen": 2377121792 }, { "epoch": 0.72, "learning_rate": 0.00028237843042850264, "loss": 0.0653, "theoretical_loss": 3.410596645351494, "tokens_seen": 2377383936 }, { "epoch": 0.72, "learning_rate": 0.00028229818648692025, "loss": 0.065, "theoretical_loss": 3.4105665994751933, "tokens_seen": 2377646080 }, { "epoch": 0.72, "learning_rate": 0.0002822179425453378, "loss": 0.0644, "theoretical_loss": 3.4105365578388054, "tokens_seen": 2377908224 }, { "epoch": 0.72, "learning_rate": 0.0002821376986037554, "loss": 0.0674, "theoretical_loss": 3.410506520441265, "tokens_seen": 2378170368 }, { "epoch": 0.72, "learning_rate": 0.00028205745466217304, "loss": 0.0661, "theoretical_loss": 3.410476487281506, "tokens_seen": 2378432512 }, { "epoch": 0.72, "learning_rate": 0.00028197721072059065, "loss": 0.0651, "theoretical_loss": 3.410446458358464, "tokens_seen": 2378694656 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.00045984104508534074, "objective/train/docs_used": 866449, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2284327745437622, "objective/train/original_loss": 1.2284328937530518, "objective/train/theoretical_loss": 3.4104164336710747, "objective/train/tokens_used": 2399416800, "objective/train/value_avg": -0.00720977783203125, "objective/train/value_loss": 0.00012368944589979947, "objective/train/value_max": -1.4483928680419922e-05, "objective/train/value_min": -0.37353515625, "objective/train/value_reward_corr": 0.754171488899678, "objective/train/value_std": 0.01329803466796875, "objective/train/weight_avg": 1.0005178451538086, "objective/train/weighted_lm_loss": 1.2287147045135498, "objective/train/weights_max": 1.3266090154647827, "objective/train/weights_min": 0.39287444949150085, "theoretical_loss": 3.4104164336710747, "tokens_seen": 2378956800 }, { "epoch": 0.72, "learning_rate": 0.00028189696677900815, "loss": 0.0651, "theoretical_loss": 3.4104164336710747, "tokens_seen": 2378956800 }, { "epoch": 0.72, "learning_rate": 0.00028181672283742577, "loss": 0.0649, "theoretical_loss": 3.4103864132182733, "tokens_seen": 2379218944 }, { "epoch": 0.72, "learning_rate": 0.0002817364788958434, "loss": 0.0668, "theoretical_loss": 3.410356396998997, "tokens_seen": 2379481088 }, { "epoch": 0.72, "learning_rate": 0.00028165623495426094, "loss": 0.0703, "theoretical_loss": 3.410326385012182, "tokens_seen": 2379743232 }, { "epoch": 0.72, "learning_rate": 0.00028157599101267855, "loss": 0.0666, "theoretical_loss": 3.4102963772567656, "tokens_seen": 2380005376 }, { "epoch": 0.72, "learning_rate": 0.00028149574707109616, "loss": 0.0665, "theoretical_loss": 3.4102663737316847, "tokens_seen": 2380267520 }, { "epoch": 0.72, "learning_rate": 0.0002814155031295138, "loss": 0.0649, "theoretical_loss": 3.410236374435878, "tokens_seen": 2380529664 }, { "epoch": 0.72, "learning_rate": 0.0002813352591879313, "loss": 0.0653, "theoretical_loss": 3.410206379368283, "tokens_seen": 2380791808 }, { "epoch": 0.72, "learning_rate": 0.0002812550152463489, "loss": 0.066, "theoretical_loss": 3.4101763885278387, "tokens_seen": 2381053952 }, { "epoch": 0.72, "learning_rate": 0.0002811747713047665, "loss": 0.0675, "theoretical_loss": 3.410146401913484, "tokens_seen": 2381316096 }, { "epoch": 0.72, "learning_rate": 0.00028109452736318406, "loss": 0.0654, "theoretical_loss": 3.410116419524158, "tokens_seen": 2381578240 }, { "epoch": 0.72, "learning_rate": 0.0002810142834216017, "loss": 0.0658, "theoretical_loss": 3.4100864413588003, "tokens_seen": 2381840384 }, { "epoch": 0.72, "learning_rate": 0.0002809340394800193, "loss": 0.0652, "theoretical_loss": 3.410056467416352, "tokens_seen": 2382102528 }, { "epoch": 0.72, "learning_rate": 0.0002808537955384369, "loss": 0.067, "theoretical_loss": 3.4100264976957524, "tokens_seen": 2382364672 }, { "epoch": 0.72, "learning_rate": 0.00028077355159685446, "loss": 0.0665, "theoretical_loss": 3.4099965321959433, "tokens_seen": 2382626816 }, { "epoch": 0.72, "learning_rate": 0.000280693307655272, "loss": 0.0656, "theoretical_loss": 3.4099665709158655, "tokens_seen": 2382888960 }, { "epoch": 0.72, "learning_rate": 0.00028061306371368963, "loss": 0.0659, "theoretical_loss": 3.409936613854461, "tokens_seen": 2383151104 }, { "epoch": 0.72, "learning_rate": 0.0002805328197721072, "loss": 0.0676, "theoretical_loss": 3.4099066610106714, "tokens_seen": 2383413248 }, { "epoch": 0.72, "learning_rate": 0.0002804525758305248, "loss": 0.0658, "theoretical_loss": 3.40987671238344, "tokens_seen": 2383675392 }, { "epoch": 0.72, "learning_rate": 0.0002803723318889424, "loss": 0.0661, "theoretical_loss": 3.4098467679717084, "tokens_seen": 2383937536 }, { "epoch": 0.72, "learning_rate": 0.00028029208794735997, "loss": 0.0666, "theoretical_loss": 3.4098168277744203, "tokens_seen": 2384199680 }, { "epoch": 0.72, "learning_rate": 0.0002802118440057776, "loss": 0.0667, "theoretical_loss": 3.4097868917905196, "tokens_seen": 2384461824 }, { "epoch": 0.72, "learning_rate": 0.00028013160006419514, "loss": 0.0654, "theoretical_loss": 3.40975696001895, "tokens_seen": 2384723968 }, { "epoch": 0.72, "learning_rate": 0.00028005135612261276, "loss": 0.0684, "theoretical_loss": 3.4097270324586555, "tokens_seen": 2384986112 }, { "epoch": 0.72, "learning_rate": 0.0002799711121810303, "loss": 0.0654, "theoretical_loss": 3.4096971091085817, "tokens_seen": 2385248256 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.0010171148460358381, "objective/train/docs_used": 868828, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3410003185272217, "objective/train/original_loss": 1.3410003185272217, "objective/train/theoretical_loss": 3.4096671899676725, "objective/train/tokens_used": 2405970400, "objective/train/value_avg": -0.00829315185546875, "objective/train/value_loss": 0.000190845355973579, "objective/train/value_max": -5.3048133850097656e-05, "objective/train/value_min": -0.82861328125, "objective/train/value_reward_corr": 0.6975594542862572, "objective/train/value_std": 0.01468658447265625, "objective/train/weight_avg": 1.0011066198349, "objective/train/weighted_lm_loss": 1.3416869640350342, "objective/train/weights_max": 1.9094114303588867, "objective/train/weights_min": 0.372306764125824, "theoretical_loss": 3.4096671899676725, "tokens_seen": 2385510400 }, { "epoch": 0.72, "learning_rate": 0.0002798908682394479, "loss": 0.0687, "theoretical_loss": 3.4096671899676725, "tokens_seen": 2385510400 }, { "epoch": 0.72, "learning_rate": 0.00027981062429786554, "loss": 0.0661, "theoretical_loss": 3.4096372750348745, "tokens_seen": 2385772544 }, { "epoch": 0.72, "learning_rate": 0.0002797303803562831, "loss": 0.0668, "theoretical_loss": 3.409607364309133, "tokens_seen": 2386034688 }, { "epoch": 0.72, "learning_rate": 0.0002796501364147007, "loss": 0.0699, "theoretical_loss": 3.4095774577893936, "tokens_seen": 2386296832 }, { "epoch": 0.72, "learning_rate": 0.00027956989247311827, "loss": 0.0655, "theoretical_loss": 3.4095475554746044, "tokens_seen": 2386558976 }, { "epoch": 0.72, "learning_rate": 0.0002794896485315359, "loss": 0.0677, "theoretical_loss": 3.409517657363711, "tokens_seen": 2386821120 }, { "epoch": 0.72, "learning_rate": 0.00027940940458995344, "loss": 0.0676, "theoretical_loss": 3.4094877634556617, "tokens_seen": 2387083264 }, { "epoch": 0.72, "learning_rate": 0.00027932916064837105, "loss": 0.0679, "theoretical_loss": 3.4094578737494037, "tokens_seen": 2387345408 }, { "epoch": 0.72, "learning_rate": 0.00027924891670678866, "loss": 0.0681, "theoretical_loss": 3.4094279882438854, "tokens_seen": 2387607552 }, { "epoch": 0.72, "learning_rate": 0.0002791686727652062, "loss": 0.0637, "theoretical_loss": 3.409398106938055, "tokens_seen": 2387869696 }, { "epoch": 0.72, "learning_rate": 0.00027908842882362384, "loss": 0.0679, "theoretical_loss": 3.4093682298308616, "tokens_seen": 2388131840 }, { "epoch": 0.72, "learning_rate": 0.00027900818488204145, "loss": 0.0664, "theoretical_loss": 3.4093383569212548, "tokens_seen": 2388393984 }, { "epoch": 0.72, "learning_rate": 0.00027892794094045895, "loss": 0.0666, "theoretical_loss": 3.4093084882081834, "tokens_seen": 2388656128 }, { "epoch": 0.72, "learning_rate": 0.00027884769699887657, "loss": 0.0718, "theoretical_loss": 3.4092786236905983, "tokens_seen": 2388918272 }, { "epoch": 0.72, "learning_rate": 0.0002787674530572942, "loss": 0.0649, "theoretical_loss": 3.4092487633674495, "tokens_seen": 2389180416 }, { "epoch": 0.72, "learning_rate": 0.0002786872091157118, "loss": 0.0668, "theoretical_loss": 3.4092189072376877, "tokens_seen": 2389442560 }, { "epoch": 0.72, "learning_rate": 0.00027860696517412935, "loss": 0.0681, "theoretical_loss": 3.4091890553002644, "tokens_seen": 2389704704 }, { "epoch": 0.72, "learning_rate": 0.00027852672123254696, "loss": 0.0677, "theoretical_loss": 3.4091592075541306, "tokens_seen": 2389966848 }, { "epoch": 0.72, "learning_rate": 0.0002784464772909646, "loss": 0.0655, "theoretical_loss": 3.4091293639982387, "tokens_seen": 2390228992 }, { "epoch": 0.72, "learning_rate": 0.0002783662333493821, "loss": 0.065, "theoretical_loss": 3.4090995246315403, "tokens_seen": 2390491136 }, { "epoch": 0.72, "learning_rate": 0.0002782859894077997, "loss": 0.0652, "theoretical_loss": 3.4090696894529886, "tokens_seen": 2390753280 }, { "epoch": 0.72, "learning_rate": 0.0002782057454662173, "loss": 0.0668, "theoretical_loss": 3.409039858461537, "tokens_seen": 2391015424 }, { "epoch": 0.72, "learning_rate": 0.0002781255015246349, "loss": 0.0677, "theoretical_loss": 3.4090100316561385, "tokens_seen": 2391277568 }, { "epoch": 0.72, "learning_rate": 0.0002780452575830525, "loss": 0.0672, "theoretical_loss": 3.408980209035746, "tokens_seen": 2391539712 }, { "epoch": 0.72, "learning_rate": 0.0002779650136414701, "loss": 0.067, "theoretical_loss": 3.4089503905993155, "tokens_seen": 2391801856 }, { "epoch": 0.72, "objective/train/advantage_avg": -0.0005384014802984893, "objective/train/docs_used": 871244, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4107896089553833, "objective/train/original_loss": 1.4107894897460938, "objective/train/theoretical_loss": 3.4089205763458, "objective/train/tokens_used": 2412524000, "objective/train/value_avg": -0.009735107421875, "objective/train/value_loss": 0.00018544522754382342, "objective/train/value_max": -3.2961368560791016e-05, "objective/train/value_min": -0.63037109375, "objective/train/value_reward_corr": 0.8186489105089638, "objective/train/value_std": 0.0194091796875, "objective/train/weight_avg": 0.9995511770248413, "objective/train/weighted_lm_loss": 1.4093742370605469, "objective/train/weights_max": 1.6551741361618042, "objective/train/weights_min": 0.36919885873794556, "theoretical_loss": 3.4089205763458, "tokens_seen": 2392064000 }, { "epoch": 0.72, "learning_rate": 0.0002778847696998877, "loss": 0.0674, "theoretical_loss": 3.4089205763458, "tokens_seen": 2392064000 }, { "epoch": 0.72, "learning_rate": 0.0002778045257583052, "loss": 0.0676, "theoretical_loss": 3.408890766274155, "tokens_seen": 2392326144 }, { "epoch": 0.73, "learning_rate": 0.0002777242818167228, "loss": 0.0669, "theoretical_loss": 3.408860960383336, "tokens_seen": 2392588288 }, { "epoch": 0.73, "learning_rate": 0.00027764403787514043, "loss": 0.066, "theoretical_loss": 3.4088311586722986, "tokens_seen": 2392850432 }, { "epoch": 0.73, "learning_rate": 0.00027756379393355804, "loss": 0.0665, "theoretical_loss": 3.4088013611399983, "tokens_seen": 2393112576 }, { "epoch": 0.73, "learning_rate": 0.0002774835499919756, "loss": 0.0671, "theoretical_loss": 3.408771567785392, "tokens_seen": 2393374720 }, { "epoch": 0.73, "learning_rate": 0.0002774033060503932, "loss": 0.0667, "theoretical_loss": 3.4087417786074363, "tokens_seen": 2393636864 }, { "epoch": 0.73, "learning_rate": 0.0002773230621088108, "loss": 0.0678, "theoretical_loss": 3.4087119936050887, "tokens_seen": 2393899008 }, { "epoch": 0.73, "learning_rate": 0.0002772428181672284, "loss": 0.0675, "theoretical_loss": 3.4086822127773067, "tokens_seen": 2394161152 }, { "epoch": 0.73, "learning_rate": 0.00027716257422564594, "loss": 0.0646, "theoretical_loss": 3.4086524361230475, "tokens_seen": 2394423296 }, { "epoch": 0.73, "learning_rate": 0.00027708233028406355, "loss": 0.0659, "theoretical_loss": 3.40862266364127, "tokens_seen": 2394685440 }, { "epoch": 0.73, "learning_rate": 0.00027700208634248117, "loss": 0.0643, "theoretical_loss": 3.4085928953309335, "tokens_seen": 2394947584 }, { "epoch": 0.73, "learning_rate": 0.0002769218424008987, "loss": 0.0676, "theoretical_loss": 3.408563131190996, "tokens_seen": 2395209728 }, { "epoch": 0.73, "learning_rate": 0.00027684159845931634, "loss": 0.0662, "theoretical_loss": 3.408533371220417, "tokens_seen": 2395471872 }, { "epoch": 0.73, "learning_rate": 0.00027676135451773395, "loss": 0.0663, "theoretical_loss": 3.408503615418156, "tokens_seen": 2395734016 }, { "epoch": 0.73, "learning_rate": 0.0002766811105761515, "loss": 0.0659, "theoretical_loss": 3.4084738637831746, "tokens_seen": 2395996160 }, { "epoch": 0.73, "learning_rate": 0.00027660086663456907, "loss": 0.0647, "theoretical_loss": 3.408444116314432, "tokens_seen": 2396258304 }, { "epoch": 0.73, "learning_rate": 0.0002765206226929867, "loss": 0.0677, "theoretical_loss": 3.4084143730108893, "tokens_seen": 2396520448 }, { "epoch": 0.73, "learning_rate": 0.00027644037875140424, "loss": 0.0672, "theoretical_loss": 3.408384633871509, "tokens_seen": 2396782592 }, { "epoch": 0.73, "learning_rate": 0.00027636013480982185, "loss": 0.0684, "theoretical_loss": 3.408354898895251, "tokens_seen": 2397044736 }, { "epoch": 0.73, "learning_rate": 0.00027627989086823946, "loss": 0.067, "theoretical_loss": 3.408325168081078, "tokens_seen": 2397306880 }, { "epoch": 0.73, "learning_rate": 0.0002761996469266571, "loss": 0.0665, "theoretical_loss": 3.4082954414279523, "tokens_seen": 2397569024 }, { "epoch": 0.73, "learning_rate": 0.00027611940298507464, "loss": 0.066, "theoretical_loss": 3.408265718934837, "tokens_seen": 2397831168 }, { "epoch": 0.73, "learning_rate": 0.00027603915904349225, "loss": 0.0686, "theoretical_loss": 3.4082360006006955, "tokens_seen": 2398093312 }, { "epoch": 0.73, "learning_rate": 0.0002759589151019098, "loss": 0.0662, "theoretical_loss": 3.4082062864244906, "tokens_seen": 2398355456 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0008262444753199816, "objective/train/docs_used": 873551, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3028806447982788, "objective/train/original_loss": 1.3028804063796997, "objective/train/theoretical_loss": 3.4081765764051863, "objective/train/tokens_used": 2419077600, "objective/train/value_avg": -0.00951385498046875, "objective/train/value_loss": 0.0004103725077584386, "objective/train/value_max": -3.1948089599609375e-05, "objective/train/value_min": -0.8154296875, "objective/train/value_reward_corr": 0.7364599665479659, "objective/train/value_std": 0.0216217041015625, "objective/train/weight_avg": 1.0010087490081787, "objective/train/weighted_lm_loss": 1.302687644958496, "objective/train/weights_max": 1.6794044971466064, "objective/train/weights_min": 0.3682406544685364, "theoretical_loss": 3.4081765764051863, "tokens_seen": 2398617600 }, { "epoch": 0.73, "learning_rate": 0.00027587867116032736, "loss": 0.0653, "theoretical_loss": 3.4081765764051863, "tokens_seen": 2398617600 }, { "epoch": 0.73, "learning_rate": 0.000275798427218745, "loss": 0.0676, "theoretical_loss": 3.4081468705417475, "tokens_seen": 2398879744 }, { "epoch": 0.73, "learning_rate": 0.0002757181832771626, "loss": 0.0642, "theoretical_loss": 3.408117168833138, "tokens_seen": 2399141888 }, { "epoch": 0.73, "learning_rate": 0.0002756379393355802, "loss": 0.0649, "theoretical_loss": 3.408087471278323, "tokens_seen": 2399404032 }, { "epoch": 0.73, "learning_rate": 0.00027555769539399776, "loss": 0.0657, "theoretical_loss": 3.4080577778762686, "tokens_seen": 2399666176 }, { "epoch": 0.73, "learning_rate": 0.0002754774514524154, "loss": 0.0691, "theoretical_loss": 3.4080280886259393, "tokens_seen": 2399928320 }, { "epoch": 0.73, "learning_rate": 0.00027539720751083293, "loss": 0.0665, "theoretical_loss": 3.4079984035263022, "tokens_seen": 2400190464 }, { "epoch": 0.73, "learning_rate": 0.0002753169635692505, "loss": 0.0647, "theoretical_loss": 3.4079687225763236, "tokens_seen": 2400452608 }, { "epoch": 0.73, "learning_rate": 0.0002752367196276681, "loss": 0.0676, "theoretical_loss": 3.40793904577497, "tokens_seen": 2400714752 }, { "epoch": 0.73, "learning_rate": 0.0002751564756860857, "loss": 0.0688, "theoretical_loss": 3.4079093731212087, "tokens_seen": 2400976896 }, { "epoch": 0.73, "learning_rate": 0.00027507623174450333, "loss": 0.0648, "theoretical_loss": 3.4078797046140075, "tokens_seen": 2401239040 }, { "epoch": 0.73, "learning_rate": 0.0002749959878029209, "loss": 0.067, "theoretical_loss": 3.4078500402523346, "tokens_seen": 2401501184 }, { "epoch": 0.73, "learning_rate": 0.0002749157438613385, "loss": 0.0655, "theoretical_loss": 3.407820380035157, "tokens_seen": 2401763328 }, { "epoch": 0.73, "learning_rate": 0.00027483549991975606, "loss": 0.0669, "theoretical_loss": 3.4077907239614453, "tokens_seen": 2402025472 }, { "epoch": 0.73, "learning_rate": 0.0002747552559781736, "loss": 0.0681, "theoretical_loss": 3.4077610720301674, "tokens_seen": 2402287616 }, { "epoch": 0.73, "learning_rate": 0.00027467501203659123, "loss": 0.069, "theoretical_loss": 3.407731424240293, "tokens_seen": 2402549760 }, { "epoch": 0.73, "learning_rate": 0.00027459476809500884, "loss": 0.0658, "theoretical_loss": 3.4077017805907923, "tokens_seen": 2402811904 }, { "epoch": 0.73, "learning_rate": 0.0002745145241534264, "loss": 0.0707, "theoretical_loss": 3.4076721410806345, "tokens_seen": 2403074048 }, { "epoch": 0.73, "learning_rate": 0.000274434280211844, "loss": 0.0659, "theoretical_loss": 3.407642505708791, "tokens_seen": 2403336192 }, { "epoch": 0.73, "learning_rate": 0.0002743540362702616, "loss": 0.0674, "theoretical_loss": 3.4076128744742324, "tokens_seen": 2403598336 }, { "epoch": 0.73, "learning_rate": 0.00027427379232867924, "loss": 0.0664, "theoretical_loss": 3.4075832473759298, "tokens_seen": 2403860480 }, { "epoch": 0.73, "learning_rate": 0.00027419354838709674, "loss": 0.0715, "theoretical_loss": 3.4075536244128553, "tokens_seen": 2404122624 }, { "epoch": 0.73, "learning_rate": 0.00027411330444551435, "loss": 0.0702, "theoretical_loss": 3.4075240055839804, "tokens_seen": 2404384768 }, { "epoch": 0.73, "learning_rate": 0.00027403306050393197, "loss": 0.0667, "theoretical_loss": 3.407494390888278, "tokens_seen": 2404646912 }, { "epoch": 0.73, "learning_rate": 0.0002739528165623495, "loss": 0.068, "theoretical_loss": 3.4074647803247204, "tokens_seen": 2404909056 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0003997816820628941, "objective/train/docs_used": 875512, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3201677799224854, "objective/train/original_loss": 1.3201675415039062, "objective/train/theoretical_loss": 3.407435173892281, "objective/train/tokens_used": 2425631200, "objective/train/value_avg": -0.006839752197265625, "objective/train/value_loss": 0.0002915321383625269, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.943359375, "objective/train/value_reward_corr": 0.6612879561403876, "objective/train/value_std": 0.0157928466796875, "objective/train/weight_avg": 1.0005264282226562, "objective/train/weighted_lm_loss": 1.3203579187393188, "objective/train/weights_max": 1.6203583478927612, "objective/train/weights_min": 0.36834144592285156, "theoretical_loss": 3.407435173892281, "tokens_seen": 2405171200 }, { "epoch": 0.73, "learning_rate": 0.00027387257262076714, "loss": 0.064, "theoretical_loss": 3.407435173892281, "tokens_seen": 2405171200 }, { "epoch": 0.73, "learning_rate": 0.00027379232867918475, "loss": 0.0696, "theoretical_loss": 3.407405571589933, "tokens_seen": 2405433344 }, { "epoch": 0.73, "learning_rate": 0.00027371208473760236, "loss": 0.0686, "theoretical_loss": 3.4073759734166504, "tokens_seen": 2405695488 }, { "epoch": 0.73, "learning_rate": 0.00027363184079601987, "loss": 0.0668, "theoretical_loss": 3.4073463793714076, "tokens_seen": 2405957632 }, { "epoch": 0.73, "learning_rate": 0.0002735515968544375, "loss": 0.0697, "theoretical_loss": 3.407316789453179, "tokens_seen": 2406219776 }, { "epoch": 0.73, "learning_rate": 0.0002734713529128551, "loss": 0.0663, "theoretical_loss": 3.40728720366094, "tokens_seen": 2406481920 }, { "epoch": 0.73, "learning_rate": 0.00027339110897127265, "loss": 0.0688, "theoretical_loss": 3.407257621993665, "tokens_seen": 2406744064 }, { "epoch": 0.73, "learning_rate": 0.00027331086502969026, "loss": 0.0659, "theoretical_loss": 3.4072280444503305, "tokens_seen": 2407006208 }, { "epoch": 0.73, "learning_rate": 0.0002732306210881079, "loss": 0.0679, "theoretical_loss": 3.407198471029912, "tokens_seen": 2407268352 }, { "epoch": 0.73, "learning_rate": 0.0002731503771465255, "loss": 0.0677, "theoretical_loss": 3.4071689017313864, "tokens_seen": 2407530496 }, { "epoch": 0.73, "learning_rate": 0.000273070133204943, "loss": 0.0686, "theoretical_loss": 3.40713933655373, "tokens_seen": 2407792640 }, { "epoch": 0.73, "learning_rate": 0.0002729898892633606, "loss": 0.0713, "theoretical_loss": 3.40710977549592, "tokens_seen": 2408054784 }, { "epoch": 0.73, "learning_rate": 0.0002729096453217782, "loss": 0.068, "theoretical_loss": 3.4070802185569344, "tokens_seen": 2408316928 }, { "epoch": 0.73, "learning_rate": 0.0002728294013801958, "loss": 0.0686, "theoretical_loss": 3.407050665735751, "tokens_seen": 2408579072 }, { "epoch": 0.73, "learning_rate": 0.0002727491574386134, "loss": 0.0672, "theoretical_loss": 3.407021117031347, "tokens_seen": 2408841216 }, { "epoch": 0.73, "learning_rate": 0.000272668913497031, "loss": 0.066, "theoretical_loss": 3.4069915724427027, "tokens_seen": 2409103360 }, { "epoch": 0.73, "learning_rate": 0.0002725886695554486, "loss": 0.0662, "theoretical_loss": 3.4069620319687957, "tokens_seen": 2409365504 }, { "epoch": 0.73, "learning_rate": 0.00027250842561386617, "loss": 0.0693, "theoretical_loss": 3.406932495608606, "tokens_seen": 2409627648 }, { "epoch": 0.73, "learning_rate": 0.00027242818167228373, "loss": 0.0673, "theoretical_loss": 3.4069029633611128, "tokens_seen": 2409889792 }, { "epoch": 0.73, "learning_rate": 0.00027234793773070134, "loss": 0.0678, "theoretical_loss": 3.4068734352252967, "tokens_seen": 2410151936 }, { "epoch": 0.73, "learning_rate": 0.0002722676937891189, "loss": 0.0667, "theoretical_loss": 3.406843911200138, "tokens_seen": 2410414080 }, { "epoch": 0.73, "learning_rate": 0.0002721874498475365, "loss": 0.0686, "theoretical_loss": 3.406814391284617, "tokens_seen": 2410676224 }, { "epoch": 0.73, "learning_rate": 0.00027210720590595413, "loss": 0.0655, "theoretical_loss": 3.4067848754777152, "tokens_seen": 2410938368 }, { "epoch": 0.73, "learning_rate": 0.0002720269619643717, "loss": 0.0679, "theoretical_loss": 3.4067553637784145, "tokens_seen": 2411200512 }, { "epoch": 0.73, "learning_rate": 0.0002719467180227893, "loss": 0.0683, "theoretical_loss": 3.406725856185696, "tokens_seen": 2411462656 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0002313482400495559, "objective/train/docs_used": 877919, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3729504346847534, "objective/train/original_loss": 1.3729503154754639, "objective/train/theoretical_loss": 3.4066963526985425, "objective/train/tokens_used": 2432184800, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.00015894180978648365, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.697265625, "objective/train/value_reward_corr": 0.7240007122567931, "objective/train/value_std": 0.01336669921875, "objective/train/weight_avg": 1.0003055334091187, "objective/train/weighted_lm_loss": 1.3730709552764893, "objective/train/weights_max": 1.3394194841384888, "objective/train/weights_min": 0.3690439462661743, "theoretical_loss": 3.4066963526985425, "tokens_seen": 2411724800 }, { "epoch": 0.73, "learning_rate": 0.00027186647408120686, "loss": 0.0671, "theoretical_loss": 3.4066963526985425, "tokens_seen": 2411724800 }, { "epoch": 0.73, "learning_rate": 0.00027178623013962447, "loss": 0.0675, "theoretical_loss": 3.4066668533159365, "tokens_seen": 2411986944 }, { "epoch": 0.73, "learning_rate": 0.00027170598619804203, "loss": 0.0689, "theoretical_loss": 3.4066373580368614, "tokens_seen": 2412249088 }, { "epoch": 0.73, "learning_rate": 0.00027162574225645964, "loss": 0.069, "theoretical_loss": 3.4066078668602997, "tokens_seen": 2412511232 }, { "epoch": 0.73, "learning_rate": 0.00027154549831487725, "loss": 0.0673, "theoretical_loss": 3.4065783797852354, "tokens_seen": 2412773376 }, { "epoch": 0.73, "learning_rate": 0.0002714652543732948, "loss": 0.0669, "theoretical_loss": 3.4065488968106528, "tokens_seen": 2413035520 }, { "epoch": 0.73, "learning_rate": 0.0002713850104317124, "loss": 0.0691, "theoretical_loss": 3.406519417935536, "tokens_seen": 2413297664 }, { "epoch": 0.73, "learning_rate": 0.00027130476649013004, "loss": 0.0686, "theoretical_loss": 3.4064899431588698, "tokens_seen": 2413559808 }, { "epoch": 0.73, "learning_rate": 0.0002712245225485476, "loss": 0.067, "theoretical_loss": 3.4064604724796395, "tokens_seen": 2413821952 }, { "epoch": 0.73, "learning_rate": 0.00027114427860696515, "loss": 0.0683, "theoretical_loss": 3.4064310058968306, "tokens_seen": 2414084096 }, { "epoch": 0.73, "learning_rate": 0.00027106403466538277, "loss": 0.0639, "theoretical_loss": 3.406401543409429, "tokens_seen": 2414346240 }, { "epoch": 0.73, "learning_rate": 0.0002709837907238004, "loss": 0.0689, "theoretical_loss": 3.4063720850164203, "tokens_seen": 2414608384 }, { "epoch": 0.73, "learning_rate": 0.00027090354678221794, "loss": 0.0674, "theoretical_loss": 3.406342630716792, "tokens_seen": 2414870528 }, { "epoch": 0.73, "learning_rate": 0.00027082330284063555, "loss": 0.068, "theoretical_loss": 3.406313180509531, "tokens_seen": 2415132672 }, { "epoch": 0.73, "learning_rate": 0.00027074305889905316, "loss": 0.0677, "theoretical_loss": 3.406283734393624, "tokens_seen": 2415394816 }, { "epoch": 0.73, "learning_rate": 0.00027066281495747067, "loss": 0.066, "theoretical_loss": 3.4062542923680588, "tokens_seen": 2415656960 }, { "epoch": 0.73, "learning_rate": 0.0002705825710158883, "loss": 0.0681, "theoretical_loss": 3.406224854431824, "tokens_seen": 2415919104 }, { "epoch": 0.73, "learning_rate": 0.0002705023270743059, "loss": 0.0684, "theoretical_loss": 3.406195420583907, "tokens_seen": 2416181248 }, { "epoch": 0.73, "learning_rate": 0.0002704220831327235, "loss": 0.0662, "theoretical_loss": 3.4061659908232973, "tokens_seen": 2416443392 }, { "epoch": 0.73, "learning_rate": 0.00027034183919114106, "loss": 0.0668, "theoretical_loss": 3.4061365651489837, "tokens_seen": 2416705536 }, { "epoch": 0.73, "learning_rate": 0.0002702615952495587, "loss": 0.0686, "theoretical_loss": 3.406107143559956, "tokens_seen": 2416967680 }, { "epoch": 0.73, "learning_rate": 0.0002701813513079763, "loss": 0.0678, "theoretical_loss": 3.4060777260552038, "tokens_seen": 2417229824 }, { "epoch": 0.73, "learning_rate": 0.0002701011073663938, "loss": 0.0685, "theoretical_loss": 3.4060483126337173, "tokens_seen": 2417491968 }, { "epoch": 0.73, "learning_rate": 0.0002700208634248114, "loss": 0.0695, "theoretical_loss": 3.4060189032944868, "tokens_seen": 2417754112 }, { "epoch": 0.73, "learning_rate": 0.000269940619483229, "loss": 0.066, "theoretical_loss": 3.4059894980365035, "tokens_seen": 2418016256 }, { "epoch": 0.73, "objective/train/advantage_avg": -0.0001355194835923612, "objective/train/docs_used": 880852, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3654475212097168, "objective/train/original_loss": 1.3654474020004272, "objective/train/theoretical_loss": 3.4059600968587587, "objective/train/tokens_used": 2438738400, "objective/train/value_avg": -0.007030487060546875, "objective/train/value_loss": 0.0004641265550162643, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.6494140625, "objective/train/value_reward_corr": 0.642583948534395, "objective/train/value_std": 0.01666259765625, "objective/train/weight_avg": 1.0000636577606201, "objective/train/weighted_lm_loss": 1.3649080991744995, "objective/train/weights_max": 1.517284870147705, "objective/train/weights_min": 0.36884480714797974, "theoretical_loss": 3.4059600968587587, "tokens_seen": 2418278400 }, { "epoch": 0.73, "learning_rate": 0.00026986037554164663, "loss": 0.0675, "theoretical_loss": 3.4059600968587587, "tokens_seen": 2418278400 }, { "epoch": 0.73, "learning_rate": 0.0002697801316000642, "loss": 0.0664, "theoretical_loss": 3.4059306997602437, "tokens_seen": 2418540544 }, { "epoch": 0.73, "learning_rate": 0.0002696998876584818, "loss": 0.066, "theoretical_loss": 3.405901306739951, "tokens_seen": 2418802688 }, { "epoch": 0.73, "learning_rate": 0.0002696196437168994, "loss": 0.0659, "theoretical_loss": 3.405871917796872, "tokens_seen": 2419064832 }, { "epoch": 0.73, "learning_rate": 0.00026953939977531697, "loss": 0.0697, "theoretical_loss": 3.405842532930001, "tokens_seen": 2419326976 }, { "epoch": 0.73, "learning_rate": 0.00026945915583373453, "loss": 0.0653, "theoretical_loss": 3.4058131521383292, "tokens_seen": 2419589120 }, { "epoch": 0.73, "learning_rate": 0.00026937891189215214, "loss": 0.0652, "theoretical_loss": 3.4057837754208515, "tokens_seen": 2419851264 }, { "epoch": 0.73, "learning_rate": 0.00026929866795056976, "loss": 0.0677, "theoretical_loss": 3.4057544027765605, "tokens_seen": 2420113408 }, { "epoch": 0.73, "learning_rate": 0.0002692184240089873, "loss": 0.07, "theoretical_loss": 3.4057250342044516, "tokens_seen": 2420375552 }, { "epoch": 0.73, "learning_rate": 0.0002691381800674049, "loss": 0.0652, "theoretical_loss": 3.405695669703518, "tokens_seen": 2420637696 }, { "epoch": 0.73, "learning_rate": 0.00026905793612582254, "loss": 0.0662, "theoretical_loss": 3.405666309272755, "tokens_seen": 2420899840 }, { "epoch": 0.73, "learning_rate": 0.0002689776921842401, "loss": 0.0682, "theoretical_loss": 3.4056369529111583, "tokens_seen": 2421161984 }, { "epoch": 0.73, "learning_rate": 0.00026889744824265766, "loss": 0.0679, "theoretical_loss": 3.4056076006177234, "tokens_seen": 2421424128 }, { "epoch": 0.73, "learning_rate": 0.00026881720430107527, "loss": 0.0661, "theoretical_loss": 3.4055782523914453, "tokens_seen": 2421686272 }, { "epoch": 0.73, "learning_rate": 0.0002687369603594929, "loss": 0.0678, "theoretical_loss": 3.405548908231321, "tokens_seen": 2421948416 }, { "epoch": 0.73, "learning_rate": 0.00026865671641791044, "loss": 0.0665, "theoretical_loss": 3.405519568136347, "tokens_seen": 2422210560 }, { "epoch": 0.73, "learning_rate": 0.00026857647247632805, "loss": 0.0715, "theoretical_loss": 3.40549023210552, "tokens_seen": 2422472704 }, { "epoch": 0.73, "learning_rate": 0.00026849622853474566, "loss": 0.0704, "theoretical_loss": 3.4054609001378378, "tokens_seen": 2422734848 }, { "epoch": 0.73, "learning_rate": 0.0002684159845931632, "loss": 0.0683, "theoretical_loss": 3.4054315722322976, "tokens_seen": 2422996992 }, { "epoch": 0.73, "learning_rate": 0.0002683357406515808, "loss": 0.0664, "theoretical_loss": 3.4054022483878983, "tokens_seen": 2423259136 }, { "epoch": 0.73, "learning_rate": 0.0002682554967099984, "loss": 0.0682, "theoretical_loss": 3.4053729286036374, "tokens_seen": 2423521280 }, { "epoch": 0.73, "learning_rate": 0.00026817525276841595, "loss": 0.0662, "theoretical_loss": 3.405343612878514, "tokens_seen": 2423783424 }, { "epoch": 0.73, "learning_rate": 0.00026809500882683357, "loss": 0.0651, "theoretical_loss": 3.405314301211527, "tokens_seen": 2424045568 }, { "epoch": 0.73, "learning_rate": 0.0002680147648852512, "loss": 0.0653, "theoretical_loss": 3.4052849936016765, "tokens_seen": 2424307712 }, { "epoch": 0.73, "learning_rate": 0.0002679345209436688, "loss": 0.0671, "theoretical_loss": 3.4052556900479614, "tokens_seen": 2424569856 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0004885383532382548, "objective/train/docs_used": 882990, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3813307285308838, "objective/train/original_loss": 1.3813307285308838, "objective/train/theoretical_loss": 3.405226390549383, "objective/train/tokens_used": 2445292000, "objective/train/value_avg": -0.00766754150390625, "objective/train/value_loss": 0.00018243658996652812, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.306640625, "objective/train/value_reward_corr": 0.6878939764227319, "objective/train/value_std": 0.012908935546875, "objective/train/weight_avg": 1.0005711317062378, "objective/train/weighted_lm_loss": 1.381544828414917, "objective/train/weights_max": 1.1512739658355713, "objective/train/weights_min": 0.3689032196998596, "theoretical_loss": 3.405226390549383, "tokens_seen": 2424832000 }, { "epoch": 0.73, "learning_rate": 0.00026785427700208635, "loss": 0.0676, "theoretical_loss": 3.405226390549383, "tokens_seen": 2424832000 }, { "epoch": 0.73, "learning_rate": 0.00026777403306050396, "loss": 0.0685, "theoretical_loss": 3.4051970951049406, "tokens_seen": 2425094144 }, { "epoch": 0.73, "learning_rate": 0.0002676937891189215, "loss": 0.0694, "theoretical_loss": 3.4051678037136357, "tokens_seen": 2425356288 }, { "epoch": 0.74, "learning_rate": 0.0002676135451773391, "loss": 0.0675, "theoretical_loss": 3.40513851637447, "tokens_seen": 2425618432 }, { "epoch": 0.74, "learning_rate": 0.0002675333012357567, "loss": 0.0679, "theoretical_loss": 3.4051092330864443, "tokens_seen": 2425880576 }, { "epoch": 0.74, "learning_rate": 0.0002674530572941743, "loss": 0.0674, "theoretical_loss": 3.405079953848561, "tokens_seen": 2426142720 }, { "epoch": 0.74, "learning_rate": 0.0002673728133525919, "loss": 0.068, "theoretical_loss": 3.4050506786598222, "tokens_seen": 2426404864 }, { "epoch": 0.74, "learning_rate": 0.0002672925694110095, "loss": 0.0687, "theoretical_loss": 3.405021407519231, "tokens_seen": 2426667008 }, { "epoch": 0.74, "learning_rate": 0.0002672123254694271, "loss": 0.068, "theoretical_loss": 3.4049921404257897, "tokens_seen": 2426929152 }, { "epoch": 0.74, "learning_rate": 0.00026713208152784465, "loss": 0.0652, "theoretical_loss": 3.4049628773785017, "tokens_seen": 2427191296 }, { "epoch": 0.74, "learning_rate": 0.0002670518375862622, "loss": 0.0649, "theoretical_loss": 3.4049336183763717, "tokens_seen": 2427453440 }, { "epoch": 0.74, "learning_rate": 0.0002669715936446798, "loss": 0.0677, "theoretical_loss": 3.4049043634184026, "tokens_seen": 2427715584 }, { "epoch": 0.74, "learning_rate": 0.00026689134970309743, "loss": 0.0681, "theoretical_loss": 3.4048751125035994, "tokens_seen": 2427977728 }, { "epoch": 0.74, "learning_rate": 0.00026681110576151504, "loss": 0.067, "theoretical_loss": 3.404845865630967, "tokens_seen": 2428239872 }, { "epoch": 0.74, "learning_rate": 0.0002667308618199326, "loss": 0.0675, "theoretical_loss": 3.40481662279951, "tokens_seen": 2428502016 }, { "epoch": 0.74, "learning_rate": 0.0002666506178783502, "loss": 0.0671, "theoretical_loss": 3.4047873840082348, "tokens_seen": 2428764160 }, { "epoch": 0.74, "learning_rate": 0.0002665703739367678, "loss": 0.0673, "theoretical_loss": 3.404758149256146, "tokens_seen": 2429026304 }, { "epoch": 0.74, "learning_rate": 0.00026649012999518533, "loss": 0.0671, "theoretical_loss": 3.4047289185422507, "tokens_seen": 2429288448 }, { "epoch": 0.74, "learning_rate": 0.00026640988605360294, "loss": 0.0653, "theoretical_loss": 3.404699691865555, "tokens_seen": 2429550592 }, { "epoch": 0.74, "learning_rate": 0.00026632964211202055, "loss": 0.0681, "theoretical_loss": 3.4046704692250658, "tokens_seen": 2429812736 }, { "epoch": 0.74, "learning_rate": 0.0002662493981704381, "loss": 0.0663, "theoretical_loss": 3.404641250619791, "tokens_seen": 2430074880 }, { "epoch": 0.74, "learning_rate": 0.0002661691542288557, "loss": 0.0694, "theoretical_loss": 3.404612036048737, "tokens_seen": 2430337024 }, { "epoch": 0.74, "learning_rate": 0.00026608891028727334, "loss": 0.0684, "theoretical_loss": 3.404582825510913, "tokens_seen": 2430599168 }, { "epoch": 0.74, "learning_rate": 0.00026600866634569095, "loss": 0.0668, "theoretical_loss": 3.404553619005326, "tokens_seen": 2430861312 }, { "epoch": 0.74, "learning_rate": 0.00026592842240410846, "loss": 0.0673, "theoretical_loss": 3.404524416530986, "tokens_seen": 2431123456 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.0006771566113457084, "objective/train/docs_used": 885315, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3741639852523804, "objective/train/original_loss": 1.3741638660430908, "objective/train/theoretical_loss": 3.4044952180869013, "objective/train/tokens_used": 2451845600, "objective/train/value_avg": -0.006244659423828125, "objective/train/value_loss": 0.00024454991216771305, "objective/train/value_max": -2.6702880859375e-05, "objective/train/value_min": -0.79736328125, "objective/train/value_reward_corr": 0.587969724531287, "objective/train/value_std": 0.011932373046875, "objective/train/weight_avg": 1.0007866621017456, "objective/train/weighted_lm_loss": 1.3749604225158691, "objective/train/weights_max": 2.1188037395477295, "objective/train/weights_min": 0.369240403175354, "theoretical_loss": 3.4044952180869013, "tokens_seen": 2431385600 }, { "epoch": 0.74, "learning_rate": 0.00026584817846252607, "loss": 0.0673, "theoretical_loss": 3.4044952180869013, "tokens_seen": 2431385600 }, { "epoch": 0.74, "learning_rate": 0.0002657679345209437, "loss": 0.0689, "theoretical_loss": 3.4044660236720805, "tokens_seen": 2431647744 }, { "epoch": 0.74, "learning_rate": 0.00026568769057936124, "loss": 0.0655, "theoretical_loss": 3.404436833285535, "tokens_seen": 2431909888 }, { "epoch": 0.74, "learning_rate": 0.00026560744663777885, "loss": 0.0671, "theoretical_loss": 3.4044076469262734, "tokens_seen": 2432172032 }, { "epoch": 0.74, "learning_rate": 0.00026552720269619646, "loss": 0.0653, "theoretical_loss": 3.4043784645933064, "tokens_seen": 2432434176 }, { "epoch": 0.74, "learning_rate": 0.0002654469587546141, "loss": 0.0671, "theoretical_loss": 3.4043492862856457, "tokens_seen": 2432696320 }, { "epoch": 0.74, "learning_rate": 0.0002653667148130316, "loss": 0.0684, "theoretical_loss": 3.404320112002301, "tokens_seen": 2432958464 }, { "epoch": 0.74, "learning_rate": 0.0002652864708714492, "loss": 0.0676, "theoretical_loss": 3.4042909417422846, "tokens_seen": 2433220608 }, { "epoch": 0.74, "learning_rate": 0.0002652062269298668, "loss": 0.0679, "theoretical_loss": 3.404261775504608, "tokens_seen": 2433482752 }, { "epoch": 0.74, "learning_rate": 0.00026512598298828436, "loss": 0.0658, "theoretical_loss": 3.404232613288283, "tokens_seen": 2433744896 }, { "epoch": 0.74, "learning_rate": 0.000265045739046702, "loss": 0.0676, "theoretical_loss": 3.404203455092323, "tokens_seen": 2434007040 }, { "epoch": 0.74, "learning_rate": 0.0002649654951051196, "loss": 0.0652, "theoretical_loss": 3.40417430091574, "tokens_seen": 2434269184 }, { "epoch": 0.74, "learning_rate": 0.0002648852511635372, "loss": 0.0675, "theoretical_loss": 3.404145150757548, "tokens_seen": 2434531328 }, { "epoch": 0.74, "learning_rate": 0.00026480500722195476, "loss": 0.0684, "theoretical_loss": 3.4041160046167596, "tokens_seen": 2434793472 }, { "epoch": 0.74, "learning_rate": 0.0002647247632803723, "loss": 0.0664, "theoretical_loss": 3.4040868624923895, "tokens_seen": 2435055616 }, { "epoch": 0.74, "learning_rate": 0.00026464451933878993, "loss": 0.0687, "theoretical_loss": 3.4040577243834513, "tokens_seen": 2435317760 }, { "epoch": 0.74, "learning_rate": 0.0002645642753972075, "loss": 0.0649, "theoretical_loss": 3.4040285902889598, "tokens_seen": 2435579904 }, { "epoch": 0.74, "learning_rate": 0.0002644840314556251, "loss": 0.0682, "theoretical_loss": 3.40399946020793, "tokens_seen": 2435842048 }, { "epoch": 0.74, "learning_rate": 0.0002644037875140427, "loss": 0.0664, "theoretical_loss": 3.403970334139377, "tokens_seen": 2436104192 }, { "epoch": 0.74, "learning_rate": 0.00026432354357246033, "loss": 0.0655, "theoretical_loss": 3.403941212082317, "tokens_seen": 2436366336 }, { "epoch": 0.74, "learning_rate": 0.0002642432996308779, "loss": 0.0677, "theoretical_loss": 3.4039120940357654, "tokens_seen": 2436628480 }, { "epoch": 0.74, "learning_rate": 0.00026416305568929544, "loss": 0.0664, "theoretical_loss": 3.4038829799987385, "tokens_seen": 2436890624 }, { "epoch": 0.74, "learning_rate": 0.00026408281174771306, "loss": 0.0673, "theoretical_loss": 3.4038538699702534, "tokens_seen": 2437152768 }, { "epoch": 0.74, "learning_rate": 0.0002640025678061306, "loss": 0.0665, "theoretical_loss": 3.403824763949326, "tokens_seen": 2437414912 }, { "epoch": 0.74, "learning_rate": 0.00026392232386454823, "loss": 0.0673, "theoretical_loss": 3.403795661934976, "tokens_seen": 2437677056 }, { "epoch": 0.74, "objective/train/advantage_avg": 3.9483591535827145e-05, "objective/train/docs_used": 887686, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2737083435058594, "objective/train/original_loss": 1.2737083435058594, "objective/train/theoretical_loss": 3.4037665639262182, "objective/train/tokens_used": 2458399200, "objective/train/value_avg": -0.00609588623046875, "objective/train/value_loss": 0.00029432339943014085, "objective/train/value_max": -4.00543212890625e-05, "objective/train/value_min": -0.70263671875, "objective/train/value_reward_corr": 0.6688808394913893, "objective/train/value_std": 0.0136260986328125, "objective/train/weight_avg": 1.0001697540283203, "objective/train/weighted_lm_loss": 1.2733510732650757, "objective/train/weights_max": 1.6455042362213135, "objective/train/weights_min": 0.3695588707923889, "theoretical_loss": 3.4037665639262182, "tokens_seen": 2437939200 }, { "epoch": 0.74, "learning_rate": 0.00026384207992296584, "loss": 0.0643, "theoretical_loss": 3.4037665639262182, "tokens_seen": 2437939200 }, { "epoch": 0.74, "learning_rate": 0.0002637618359813834, "loss": 0.0655, "theoretical_loss": 3.403737469922073, "tokens_seen": 2438201344 }, { "epoch": 0.74, "learning_rate": 0.000263681592039801, "loss": 0.0675, "theoretical_loss": 3.4037083799215573, "tokens_seen": 2438463488 }, { "epoch": 0.74, "learning_rate": 0.00026360134809821857, "loss": 0.0648, "theoretical_loss": 3.40367929392369, "tokens_seen": 2438725632 }, { "epoch": 0.74, "learning_rate": 0.0002635211041566362, "loss": 0.0661, "theoretical_loss": 3.4036502119274914, "tokens_seen": 2438987776 }, { "epoch": 0.74, "learning_rate": 0.00026344086021505374, "loss": 0.0633, "theoretical_loss": 3.4036211339319795, "tokens_seen": 2439249920 }, { "epoch": 0.74, "learning_rate": 0.00026336061627347135, "loss": 0.0658, "theoretical_loss": 3.4035920599361753, "tokens_seen": 2439512064 }, { "epoch": 0.74, "learning_rate": 0.00026328037233188897, "loss": 0.0686, "theoretical_loss": 3.403562989939098, "tokens_seen": 2439774208 }, { "epoch": 0.74, "learning_rate": 0.0002632001283903065, "loss": 0.0679, "theoretical_loss": 3.4035339239397686, "tokens_seen": 2440036352 }, { "epoch": 0.74, "learning_rate": 0.00026311988444872414, "loss": 0.0688, "theoretical_loss": 3.4035048619372072, "tokens_seen": 2440298496 }, { "epoch": 0.74, "learning_rate": 0.00026303964050714175, "loss": 0.0663, "theoretical_loss": 3.403475803930436, "tokens_seen": 2440560640 }, { "epoch": 0.74, "learning_rate": 0.0002629593965655593, "loss": 0.0663, "theoretical_loss": 3.4034467499184755, "tokens_seen": 2440822784 }, { "epoch": 0.74, "learning_rate": 0.00026287915262397687, "loss": 0.0662, "theoretical_loss": 3.4034176999003485, "tokens_seen": 2441084928 }, { "epoch": 0.74, "learning_rate": 0.0002627989086823945, "loss": 0.0648, "theoretical_loss": 3.4033886538750764, "tokens_seen": 2441347072 }, { "epoch": 0.74, "learning_rate": 0.0002627186647408121, "loss": 0.0668, "theoretical_loss": 3.4033596118416822, "tokens_seen": 2441609216 }, { "epoch": 0.74, "learning_rate": 0.00026263842079922965, "loss": 0.0632, "theoretical_loss": 3.4033305737991886, "tokens_seen": 2441871360 }, { "epoch": 0.74, "learning_rate": 0.00026255817685764726, "loss": 0.0681, "theoretical_loss": 3.403301539746619, "tokens_seen": 2442133504 }, { "epoch": 0.74, "learning_rate": 0.0002624779329160649, "loss": 0.0665, "theoretical_loss": 3.4032725096829965, "tokens_seen": 2442395648 }, { "epoch": 0.74, "learning_rate": 0.0002623976889744824, "loss": 0.0669, "theoretical_loss": 3.4032434836073455, "tokens_seen": 2442657792 }, { "epoch": 0.74, "learning_rate": 0.0002623174450329, "loss": 0.0688, "theoretical_loss": 3.40321446151869, "tokens_seen": 2442919936 }, { "epoch": 0.74, "learning_rate": 0.0002622372010913176, "loss": 0.0679, "theoretical_loss": 3.4031854434160556, "tokens_seen": 2443182080 }, { "epoch": 0.74, "learning_rate": 0.0002621569571497352, "loss": 0.0682, "theoretical_loss": 3.4031564292984653, "tokens_seen": 2443444224 }, { "epoch": 0.74, "learning_rate": 0.0002620767132081528, "loss": 0.0665, "theoretical_loss": 3.403127419164946, "tokens_seen": 2443706368 }, { "epoch": 0.74, "learning_rate": 0.0002619964692665704, "loss": 0.0681, "theoretical_loss": 3.4030984130145225, "tokens_seen": 2443968512 }, { "epoch": 0.74, "learning_rate": 0.000261916225324988, "loss": 0.069, "theoretical_loss": 3.4030694108462214, "tokens_seen": 2444230656 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.0003610901185311377, "objective/train/docs_used": 890203, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4358042478561401, "objective/train/original_loss": 1.4358041286468506, "objective/train/theoretical_loss": 3.4030404126590685, "objective/train/tokens_used": 2464952800, "objective/train/value_avg": -0.009307861328125, "objective/train/value_loss": 0.00024326522543560714, "objective/train/value_max": -2.9981136322021484e-05, "objective/train/value_min": -0.69384765625, "objective/train/value_reward_corr": 0.7629903910047869, "objective/train/value_std": 0.0187835693359375, "objective/train/weight_avg": 1.000473976135254, "objective/train/weighted_lm_loss": 1.4363505840301514, "objective/train/weights_max": 1.3608444929122925, "objective/train/weights_min": 0.37018251419067383, "theoretical_loss": 3.4030404126590685, "tokens_seen": 2444492800 }, { "epoch": 0.74, "learning_rate": 0.00026183598138340556, "loss": 0.0687, "theoretical_loss": 3.4030404126590685, "tokens_seen": 2444492800 }, { "epoch": 0.74, "learning_rate": 0.0002617557374418231, "loss": 0.0686, "theoretical_loss": 3.403011418452091, "tokens_seen": 2444754944 }, { "epoch": 0.74, "learning_rate": 0.00026167549350024073, "loss": 0.0681, "theoretical_loss": 3.402982428224315, "tokens_seen": 2445017088 }, { "epoch": 0.74, "learning_rate": 0.00026159524955865834, "loss": 0.0688, "theoretical_loss": 3.4029534419747685, "tokens_seen": 2445279232 }, { "epoch": 0.74, "learning_rate": 0.0002615150056170759, "loss": 0.0678, "theoretical_loss": 3.4029244597024793, "tokens_seen": 2445541376 }, { "epoch": 0.74, "learning_rate": 0.0002614347616754935, "loss": 0.0658, "theoretical_loss": 3.402895481406475, "tokens_seen": 2445803520 }, { "epoch": 0.74, "learning_rate": 0.00026135451773391113, "loss": 0.0651, "theoretical_loss": 3.4028665070857844, "tokens_seen": 2446065664 }, { "epoch": 0.74, "learning_rate": 0.0002612742737923287, "loss": 0.0672, "theoretical_loss": 3.402837536739436, "tokens_seen": 2446327808 }, { "epoch": 0.74, "learning_rate": 0.00026119402985074624, "loss": 0.067, "theoretical_loss": 3.4028085703664583, "tokens_seen": 2446589952 }, { "epoch": 0.74, "learning_rate": 0.00026111378590916386, "loss": 0.0671, "theoretical_loss": 3.402779607965882, "tokens_seen": 2446852096 }, { "epoch": 0.74, "learning_rate": 0.00026103354196758147, "loss": 0.0684, "theoretical_loss": 3.4027506495367357, "tokens_seen": 2447114240 }, { "epoch": 0.74, "learning_rate": 0.00026095329802599903, "loss": 0.0665, "theoretical_loss": 3.4027216950780494, "tokens_seen": 2447376384 }, { "epoch": 0.74, "learning_rate": 0.00026087305408441664, "loss": 0.0653, "theoretical_loss": 3.4026927445888546, "tokens_seen": 2447638528 }, { "epoch": 0.74, "learning_rate": 0.00026079281014283425, "loss": 0.0609, "theoretical_loss": 3.4026637980681813, "tokens_seen": 2447900672 }, { "epoch": 0.74, "learning_rate": 0.0002607125662012518, "loss": 0.0663, "theoretical_loss": 3.4026348555150605, "tokens_seen": 2448162816 }, { "epoch": 0.74, "learning_rate": 0.00026063232225966937, "loss": 0.0668, "theoretical_loss": 3.402605916928524, "tokens_seen": 2448424960 }, { "epoch": 0.74, "learning_rate": 0.000260552078318087, "loss": 0.0658, "theoretical_loss": 3.4025769823076035, "tokens_seen": 2448687104 }, { "epoch": 0.74, "learning_rate": 0.0002604718343765046, "loss": 0.067, "theoretical_loss": 3.402548051651331, "tokens_seen": 2448949248 }, { "epoch": 0.74, "learning_rate": 0.00026039159043492215, "loss": 0.0689, "theoretical_loss": 3.402519124958739, "tokens_seen": 2449211392 }, { "epoch": 0.74, "learning_rate": 0.00026031134649333977, "loss": 0.0644, "theoretical_loss": 3.4024902022288606, "tokens_seen": 2449473536 }, { "epoch": 0.74, "learning_rate": 0.0002602311025517574, "loss": 0.0646, "theoretical_loss": 3.4024612834607284, "tokens_seen": 2449735680 }, { "epoch": 0.74, "learning_rate": 0.00026015085861017494, "loss": 0.0619, "theoretical_loss": 3.402432368653376, "tokens_seen": 2449997824 }, { "epoch": 0.74, "learning_rate": 0.00026007061466859255, "loss": 0.0668, "theoretical_loss": 3.4024034578058373, "tokens_seen": 2450259968 }, { "epoch": 0.74, "learning_rate": 0.0002599903707270101, "loss": 0.0646, "theoretical_loss": 3.4023745509171466, "tokens_seen": 2450522112 }, { "epoch": 0.74, "learning_rate": 0.00025991012678542767, "loss": 0.064, "theoretical_loss": 3.4023456479863383, "tokens_seen": 2450784256 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.0005494300276041031, "objective/train/docs_used": 891914, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3421109914779663, "objective/train/original_loss": 1.3421109914779663, "objective/train/theoretical_loss": 3.402316749012447, "objective/train/tokens_used": 2471506400, "objective/train/value_avg": -0.006969451904296875, "objective/train/value_loss": 0.0004489516431931406, "objective/train/value_max": -2.0623207092285156e-05, "objective/train/value_min": -0.72607421875, "objective/train/value_reward_corr": 0.6451283943786772, "objective/train/value_std": 0.0165252685546875, "objective/train/weight_avg": 1.0007450580596924, "objective/train/weighted_lm_loss": 1.3425601720809937, "objective/train/weights_max": 1.8591458797454834, "objective/train/weights_min": 0.37829095125198364, "theoretical_loss": 3.402316749012447, "tokens_seen": 2451046400 }, { "epoch": 0.74, "learning_rate": 0.0002598298828438453, "loss": 0.0648, "theoretical_loss": 3.402316749012447, "tokens_seen": 2451046400 }, { "epoch": 0.74, "learning_rate": 0.0002597496389022629, "loss": 0.0623, "theoretical_loss": 3.4022878539945087, "tokens_seen": 2451308544 }, { "epoch": 0.74, "learning_rate": 0.0002596693949606805, "loss": 0.0663, "theoretical_loss": 3.402258962931558, "tokens_seen": 2451570688 }, { "epoch": 0.74, "learning_rate": 0.00025958915101909806, "loss": 0.0645, "theoretical_loss": 3.4022300758226307, "tokens_seen": 2451832832 }, { "epoch": 0.74, "learning_rate": 0.0002595089070775157, "loss": 0.0657, "theoretical_loss": 3.4022011926667637, "tokens_seen": 2452094976 }, { "epoch": 0.74, "learning_rate": 0.00025942866313593323, "loss": 0.0636, "theoretical_loss": 3.402172313462993, "tokens_seen": 2452357120 }, { "epoch": 0.74, "learning_rate": 0.0002593484191943508, "loss": 0.0651, "theoretical_loss": 3.4021434382103557, "tokens_seen": 2452619264 }, { "epoch": 0.74, "learning_rate": 0.0002592681752527684, "loss": 0.0642, "theoretical_loss": 3.4021145669078887, "tokens_seen": 2452881408 }, { "epoch": 0.74, "learning_rate": 0.000259187931311186, "loss": 0.0652, "theoretical_loss": 3.40208569955463, "tokens_seen": 2453143552 }, { "epoch": 0.74, "learning_rate": 0.00025910768736960363, "loss": 0.0652, "theoretical_loss": 3.402056836149617, "tokens_seen": 2453405696 }, { "epoch": 0.74, "learning_rate": 0.0002590274434280212, "loss": 0.0635, "theoretical_loss": 3.4020279766918886, "tokens_seen": 2453667840 }, { "epoch": 0.74, "learning_rate": 0.0002589471994864388, "loss": 0.0663, "theoretical_loss": 3.401999121180482, "tokens_seen": 2453929984 }, { "epoch": 0.74, "learning_rate": 0.00025886695554485636, "loss": 0.0604, "theoretical_loss": 3.401970269614438, "tokens_seen": 2454192128 }, { "epoch": 0.74, "learning_rate": 0.0002587867116032739, "loss": 0.065, "theoretical_loss": 3.4019414219927944, "tokens_seen": 2454454272 }, { "epoch": 0.74, "learning_rate": 0.00025870646766169153, "loss": 0.0622, "theoretical_loss": 3.4019125783145907, "tokens_seen": 2454716416 }, { "epoch": 0.74, "learning_rate": 0.00025862622372010914, "loss": 0.0629, "theoretical_loss": 3.4018837385788676, "tokens_seen": 2454978560 }, { "epoch": 0.74, "learning_rate": 0.00025854597977852676, "loss": 0.0666, "theoretical_loss": 3.401854902784665, "tokens_seen": 2455240704 }, { "epoch": 0.74, "learning_rate": 0.0002584657358369443, "loss": 0.0653, "theoretical_loss": 3.4018260709310235, "tokens_seen": 2455502848 }, { "epoch": 0.74, "learning_rate": 0.0002583854918953619, "loss": 0.0634, "theoretical_loss": 3.4017972430169836, "tokens_seen": 2455764992 }, { "epoch": 0.74, "learning_rate": 0.00025830524795377954, "loss": 0.0658, "theoretical_loss": 3.4017684190415873, "tokens_seen": 2456027136 }, { "epoch": 0.74, "learning_rate": 0.00025822500401219704, "loss": 0.0642, "theoretical_loss": 3.4017395990038755, "tokens_seen": 2456289280 }, { "epoch": 0.74, "learning_rate": 0.00025814476007061466, "loss": 0.0656, "theoretical_loss": 3.4017107829028905, "tokens_seen": 2456551424 }, { "epoch": 0.74, "learning_rate": 0.00025806451612903227, "loss": 0.0632, "theoretical_loss": 3.401681970737674, "tokens_seen": 2456813568 }, { "epoch": 0.74, "learning_rate": 0.0002579842721874498, "loss": 0.0653, "theoretical_loss": 3.4016531625072695, "tokens_seen": 2457075712 }, { "epoch": 0.74, "learning_rate": 0.00025790402824586744, "loss": 0.0672, "theoretical_loss": 3.4016243582107193, "tokens_seen": 2457337856 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.0005634583649225533, "objective/train/docs_used": 894282, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2582533359527588, "objective/train/original_loss": 1.2582532167434692, "objective/train/theoretical_loss": 3.4015955578470667, "objective/train/tokens_used": 2478060000, "objective/train/value_avg": -0.005840301513671875, "objective/train/value_loss": 0.00014222218305803835, "objective/train/value_max": -2.4497509002685547e-05, "objective/train/value_min": -0.39453125, "objective/train/value_reward_corr": 0.6571031252196362, "objective/train/value_std": 0.010833740234375, "objective/train/weight_avg": 1.0006279945373535, "objective/train/weighted_lm_loss": 1.2587014436721802, "objective/train/weights_max": 1.2988438606262207, "objective/train/weights_min": 0.3701740503311157, "theoretical_loss": 3.4015955578470667, "tokens_seen": 2457600000 }, { "epoch": 0.74, "learning_rate": 0.00025782378430428505, "loss": 0.063, "theoretical_loss": 3.4015955578470667, "tokens_seen": 2457600000 }, { "epoch": 0.74, "learning_rate": 0.00025774354036270266, "loss": 0.0644, "theoretical_loss": 3.4015667614153555, "tokens_seen": 2457862144 }, { "epoch": 0.74, "learning_rate": 0.00025766329642112017, "loss": 0.0632, "theoretical_loss": 3.4015379689146292, "tokens_seen": 2458124288 }, { "epoch": 0.74, "learning_rate": 0.0002575830524795378, "loss": 0.0662, "theoretical_loss": 3.4015091803439326, "tokens_seen": 2458386432 }, { "epoch": 0.75, "learning_rate": 0.0002575028085379554, "loss": 0.067, "theoretical_loss": 3.4014803957023094, "tokens_seen": 2458648576 }, { "epoch": 0.75, "learning_rate": 0.00025742256459637295, "loss": 0.0645, "theoretical_loss": 3.4014516149888054, "tokens_seen": 2458910720 }, { "epoch": 0.75, "learning_rate": 0.00025734232065479057, "loss": 0.0626, "theoretical_loss": 3.401422838202466, "tokens_seen": 2459172864 }, { "epoch": 0.75, "learning_rate": 0.0002572620767132082, "loss": 0.0618, "theoretical_loss": 3.401394065342336, "tokens_seen": 2459435008 }, { "epoch": 0.75, "learning_rate": 0.0002571818327716258, "loss": 0.0661, "theoretical_loss": 3.4013652964074614, "tokens_seen": 2459697152 }, { "epoch": 0.75, "learning_rate": 0.00025710158883004335, "loss": 0.0655, "theoretical_loss": 3.4013365313968893, "tokens_seen": 2459959296 }, { "epoch": 0.75, "learning_rate": 0.0002570213448884609, "loss": 0.0655, "theoretical_loss": 3.4013077703096655, "tokens_seen": 2460221440 }, { "epoch": 0.75, "learning_rate": 0.0002569411009468785, "loss": 0.0649, "theoretical_loss": 3.4012790131448374, "tokens_seen": 2460483584 }, { "epoch": 0.75, "learning_rate": 0.0002568608570052961, "loss": 0.0611, "theoretical_loss": 3.4012502599014516, "tokens_seen": 2460745728 }, { "epoch": 0.75, "learning_rate": 0.0002567806130637137, "loss": 0.0643, "theoretical_loss": 3.4012215105785564, "tokens_seen": 2461007872 }, { "epoch": 0.75, "learning_rate": 0.0002567003691221313, "loss": 0.0642, "theoretical_loss": 3.401192765175199, "tokens_seen": 2461270016 }, { "epoch": 0.75, "learning_rate": 0.0002566201251805489, "loss": 0.0657, "theoretical_loss": 3.4011640236904284, "tokens_seen": 2461532160 }, { "epoch": 0.75, "learning_rate": 0.0002565398812389665, "loss": 0.0618, "theoretical_loss": 3.401135286123293, "tokens_seen": 2461794304 }, { "epoch": 0.75, "learning_rate": 0.00025645963729738403, "loss": 0.0647, "theoretical_loss": 3.401106552472841, "tokens_seen": 2462056448 }, { "epoch": 0.75, "learning_rate": 0.00025637939335580165, "loss": 0.0651, "theoretical_loss": 3.4010778227381233, "tokens_seen": 2462318592 }, { "epoch": 0.75, "learning_rate": 0.0002562991494142192, "loss": 0.0651, "theoretical_loss": 3.4010490969181877, "tokens_seen": 2462580736 }, { "epoch": 0.75, "learning_rate": 0.0002562189054726368, "loss": 0.0664, "theoretical_loss": 3.401020375012085, "tokens_seen": 2462842880 }, { "epoch": 0.75, "learning_rate": 0.00025613866153105443, "loss": 0.066, "theoretical_loss": 3.400991657018865, "tokens_seen": 2463105024 }, { "epoch": 0.75, "learning_rate": 0.000256058417589472, "loss": 0.0661, "theoretical_loss": 3.400962942937579, "tokens_seen": 2463367168 }, { "epoch": 0.75, "learning_rate": 0.0002559781736478896, "loss": 0.0644, "theoretical_loss": 3.400934232767277, "tokens_seen": 2463629312 }, { "epoch": 0.75, "learning_rate": 0.00025589792970630716, "loss": 0.063, "theoretical_loss": 3.400905526507011, "tokens_seen": 2463891456 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.00015025025641079992, "objective/train/docs_used": 896700, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2792147397994995, "objective/train/original_loss": 1.2792147397994995, "objective/train/theoretical_loss": 3.4008768241558323, "objective/train/tokens_used": 2484613600, "objective/train/value_avg": -0.0105133056640625, "objective/train/value_loss": 0.0002328952105017379, "objective/train/value_max": -1.8477439880371094e-05, "objective/train/value_min": -0.70068359375, "objective/train/value_reward_corr": 0.8330814107450704, "objective/train/value_std": 0.023284912109375, "objective/train/weight_avg": 1.000259518623352, "objective/train/weighted_lm_loss": 1.2786136865615845, "objective/train/weights_max": 1.8233647346496582, "objective/train/weights_min": 0.36843419075012207, "theoretical_loss": 3.4008768241558323, "tokens_seen": 2464153600 }, { "epoch": 0.75, "learning_rate": 0.00025581768576472477, "loss": 0.0646, "theoretical_loss": 3.4008768241558323, "tokens_seen": 2464153600 }, { "epoch": 0.75, "learning_rate": 0.00025573744182314233, "loss": 0.066, "theoretical_loss": 3.4008481257127925, "tokens_seen": 2464415744 }, { "epoch": 0.75, "learning_rate": 0.00025565719788155994, "loss": 0.0631, "theoretical_loss": 3.400819431176944, "tokens_seen": 2464677888 }, { "epoch": 0.75, "learning_rate": 0.00025557695393997755, "loss": 0.0621, "theoretical_loss": 3.4007907405473405, "tokens_seen": 2464940032 }, { "epoch": 0.75, "learning_rate": 0.0002554967099983951, "loss": 0.0664, "theoretical_loss": 3.400762053823033, "tokens_seen": 2465202176 }, { "epoch": 0.75, "learning_rate": 0.0002554164660568127, "loss": 0.064, "theoretical_loss": 3.400733371003076, "tokens_seen": 2465464320 }, { "epoch": 0.75, "learning_rate": 0.00025533622211523034, "loss": 0.0665, "theoretical_loss": 3.4007046920865225, "tokens_seen": 2465726464 }, { "epoch": 0.75, "learning_rate": 0.0002552559781736479, "loss": 0.0629, "theoretical_loss": 3.4006760170724264, "tokens_seen": 2465988608 }, { "epoch": 0.75, "learning_rate": 0.00025517573423206546, "loss": 0.0665, "theoretical_loss": 3.4006473459598423, "tokens_seen": 2466250752 }, { "epoch": 0.75, "learning_rate": 0.00025509549029048307, "loss": 0.0662, "theoretical_loss": 3.4006186787478243, "tokens_seen": 2466512896 }, { "epoch": 0.75, "learning_rate": 0.0002550152463489007, "loss": 0.0657, "theoretical_loss": 3.4005900154354274, "tokens_seen": 2466775040 }, { "epoch": 0.75, "learning_rate": 0.00025493500240731824, "loss": 0.0654, "theoretical_loss": 3.400561356021707, "tokens_seen": 2467037184 }, { "epoch": 0.75, "learning_rate": 0.00025485475846573585, "loss": 0.0662, "theoretical_loss": 3.400532700505719, "tokens_seen": 2467299328 }, { "epoch": 0.75, "learning_rate": 0.00025477451452415346, "loss": 0.0668, "theoretical_loss": 3.400504048886518, "tokens_seen": 2467561472 }, { "epoch": 0.75, "learning_rate": 0.000254694270582571, "loss": 0.0644, "theoretical_loss": 3.4004754011631615, "tokens_seen": 2467823616 }, { "epoch": 0.75, "learning_rate": 0.0002546140266409886, "loss": 0.0644, "theoretical_loss": 3.4004467573347053, "tokens_seen": 2468085760 }, { "epoch": 0.75, "learning_rate": 0.0002545337826994062, "loss": 0.0659, "theoretical_loss": 3.400418117400206, "tokens_seen": 2468347904 }, { "epoch": 0.75, "learning_rate": 0.0002544535387578238, "loss": 0.0639, "theoretical_loss": 3.4003894813587223, "tokens_seen": 2468610048 }, { "epoch": 0.75, "learning_rate": 0.00025437329481624136, "loss": 0.0644, "theoretical_loss": 3.4003608492093096, "tokens_seen": 2468872192 }, { "epoch": 0.75, "learning_rate": 0.000254293050874659, "loss": 0.0653, "theoretical_loss": 3.400332220951027, "tokens_seen": 2469134336 }, { "epoch": 0.75, "learning_rate": 0.0002542128069330766, "loss": 0.0651, "theoretical_loss": 3.400303596582933, "tokens_seen": 2469396480 }, { "epoch": 0.75, "learning_rate": 0.0002541325629914941, "loss": 0.0632, "theoretical_loss": 3.4002749761040847, "tokens_seen": 2469658624 }, { "epoch": 0.75, "learning_rate": 0.0002540523190499117, "loss": 0.0645, "theoretical_loss": 3.400246359513542, "tokens_seen": 2469920768 }, { "epoch": 0.75, "learning_rate": 0.0002539720751083293, "loss": 0.0677, "theoretical_loss": 3.4002177468103643, "tokens_seen": 2470182912 }, { "epoch": 0.75, "learning_rate": 0.00025389183116674693, "loss": 0.0646, "theoretical_loss": 3.40018913799361, "tokens_seen": 2470445056 }, { "epoch": 0.75, "objective/train/advantage_avg": -0.000635757576674223, "objective/train/docs_used": 899059, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2872612476348877, "objective/train/original_loss": 1.2872612476348877, "objective/train/theoretical_loss": 3.4001605330623397, "objective/train/tokens_used": 2491167200, "objective/train/value_avg": -0.00960540771484375, "objective/train/value_loss": 0.00023242933093570173, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.96484375, "objective/train/value_reward_corr": 0.8674741291638919, "objective/train/value_std": 0.0236663818359375, "objective/train/weight_avg": 0.9994723200798035, "objective/train/weighted_lm_loss": 1.2871603965759277, "objective/train/weights_max": 2.156646966934204, "objective/train/weights_min": 0.37836602330207825, "theoretical_loss": 3.4001605330623397, "tokens_seen": 2470707200 }, { "epoch": 0.75, "learning_rate": 0.0002538115872251645, "loss": 0.0634, "theoretical_loss": 3.4001605330623397, "tokens_seen": 2470707200 }, { "epoch": 0.75, "learning_rate": 0.0002537313432835821, "loss": 0.0645, "theoretical_loss": 3.400131932015613, "tokens_seen": 2470969344 }, { "epoch": 0.75, "learning_rate": 0.0002536510993419997, "loss": 0.0634, "theoretical_loss": 3.4001033348524907, "tokens_seen": 2471231488 }, { "epoch": 0.75, "learning_rate": 0.0002535708554004173, "loss": 0.0662, "theoretical_loss": 3.4000747415720336, "tokens_seen": 2471493632 }, { "epoch": 0.75, "learning_rate": 0.00025349061145883483, "loss": 0.0673, "theoretical_loss": 3.4000461521733025, "tokens_seen": 2471755776 }, { "epoch": 0.75, "learning_rate": 0.00025341036751725244, "loss": 0.0654, "theoretical_loss": 3.400017566655359, "tokens_seen": 2472017920 }, { "epoch": 0.75, "learning_rate": 0.00025333012357567006, "loss": 0.0648, "theoretical_loss": 3.3999889850172655, "tokens_seen": 2472280064 }, { "epoch": 0.75, "learning_rate": 0.0002532498796340876, "loss": 0.0643, "theoretical_loss": 3.399960407258083, "tokens_seen": 2472542208 }, { "epoch": 0.75, "learning_rate": 0.00025316963569250523, "loss": 0.0646, "theoretical_loss": 3.3999318333768747, "tokens_seen": 2472804352 }, { "epoch": 0.75, "learning_rate": 0.00025308939175092284, "loss": 0.0619, "theoretical_loss": 3.3999032633727024, "tokens_seen": 2473066496 }, { "epoch": 0.75, "learning_rate": 0.0002530091478093404, "loss": 0.0676, "theoretical_loss": 3.3998746972446305, "tokens_seen": 2473328640 }, { "epoch": 0.75, "learning_rate": 0.00025292890386775796, "loss": 0.0673, "theoretical_loss": 3.3998461349917215, "tokens_seen": 2473590784 }, { "epoch": 0.75, "learning_rate": 0.00025284865992617557, "loss": 0.0645, "theoretical_loss": 3.3998175766130396, "tokens_seen": 2473852928 }, { "epoch": 0.75, "learning_rate": 0.0002527684159845932, "loss": 0.0677, "theoretical_loss": 3.399789022107648, "tokens_seen": 2474115072 }, { "epoch": 0.75, "learning_rate": 0.00025268817204301074, "loss": 0.0656, "theoretical_loss": 3.399760471474612, "tokens_seen": 2474377216 }, { "epoch": 0.75, "learning_rate": 0.00025260792810142835, "loss": 0.0652, "theoretical_loss": 3.399731924712996, "tokens_seen": 2474639360 }, { "epoch": 0.75, "learning_rate": 0.00025252768415984597, "loss": 0.0645, "theoretical_loss": 3.3997033818218645, "tokens_seen": 2474901504 }, { "epoch": 0.75, "learning_rate": 0.0002524474402182635, "loss": 0.0676, "theoretical_loss": 3.3996748428002834, "tokens_seen": 2475163648 }, { "epoch": 0.75, "learning_rate": 0.00025236719627668114, "loss": 0.0667, "theoretical_loss": 3.3996463076473185, "tokens_seen": 2475425792 }, { "epoch": 0.75, "learning_rate": 0.0002522869523350987, "loss": 0.0659, "theoretical_loss": 3.399617776362035, "tokens_seen": 2475687936 }, { "epoch": 0.75, "learning_rate": 0.0002522067083935163, "loss": 0.0633, "theoretical_loss": 3.3995892489435007, "tokens_seen": 2475950080 }, { "epoch": 0.75, "learning_rate": 0.00025212646445193387, "loss": 0.065, "theoretical_loss": 3.3995607253907805, "tokens_seen": 2476212224 }, { "epoch": 0.75, "learning_rate": 0.0002520462205103515, "loss": 0.0659, "theoretical_loss": 3.3995322057029425, "tokens_seen": 2476474368 }, { "epoch": 0.75, "learning_rate": 0.0002519659765687691, "loss": 0.0641, "theoretical_loss": 3.3995036898790536, "tokens_seen": 2476736512 }, { "epoch": 0.75, "learning_rate": 0.00025188573262718665, "loss": 0.064, "theoretical_loss": 3.3994751779181813, "tokens_seen": 2476998656 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.000814015802461654, "objective/train/docs_used": 901501, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.241913914680481, "objective/train/original_loss": 1.241913914680481, "objective/train/theoretical_loss": 3.399446669819394, "objective/train/tokens_used": 2497720800, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.00017967262829188257, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.46923828125, "objective/train/value_reward_corr": 0.7246901935578405, "objective/train/value_std": 0.01360321044921875, "objective/train/weight_avg": 1.000894546508789, "objective/train/weighted_lm_loss": 1.2434070110321045, "objective/train/weights_max": 1.5335290431976318, "objective/train/weights_min": 0.37751826643943787, "theoretical_loss": 3.399446669819394, "tokens_seen": 2477260800 }, { "epoch": 0.75, "learning_rate": 0.00025180548868560426, "loss": 0.064, "theoretical_loss": 3.399446669819394, "tokens_seen": 2477260800 }, { "epoch": 0.75, "learning_rate": 0.0002517252447440218, "loss": 0.0659, "theoretical_loss": 3.3994181655817592, "tokens_seen": 2477522944 }, { "epoch": 0.75, "learning_rate": 0.0002516450008024394, "loss": 0.0634, "theoretical_loss": 3.3993896652043465, "tokens_seen": 2477785088 }, { "epoch": 0.75, "learning_rate": 0.000251564756860857, "loss": 0.0665, "theoretical_loss": 3.399361168686224, "tokens_seen": 2478047232 }, { "epoch": 0.75, "learning_rate": 0.0002514845129192746, "loss": 0.0654, "theoretical_loss": 3.399332676026461, "tokens_seen": 2478309376 }, { "epoch": 0.75, "learning_rate": 0.0002514042689776922, "loss": 0.0686, "theoretical_loss": 3.399304187224127, "tokens_seen": 2478571520 }, { "epoch": 0.75, "learning_rate": 0.0002513240250361098, "loss": 0.0659, "theoretical_loss": 3.3992757022782927, "tokens_seen": 2478833664 }, { "epoch": 0.75, "learning_rate": 0.0002512437810945274, "loss": 0.0666, "theoretical_loss": 3.3992472211880274, "tokens_seen": 2479095808 }, { "epoch": 0.75, "learning_rate": 0.00025116353715294495, "loss": 0.0667, "theoretical_loss": 3.3992187439524018, "tokens_seen": 2479357952 }, { "epoch": 0.75, "learning_rate": 0.0002510832932113625, "loss": 0.0659, "theoretical_loss": 3.399190270570487, "tokens_seen": 2479620096 }, { "epoch": 0.75, "learning_rate": 0.0002510030492697801, "loss": 0.0666, "theoretical_loss": 3.3991618010413536, "tokens_seen": 2479882240 }, { "epoch": 0.75, "learning_rate": 0.00025092280532819773, "loss": 0.0675, "theoretical_loss": 3.3991333353640742, "tokens_seen": 2480144384 }, { "epoch": 0.75, "learning_rate": 0.00025084256138661534, "loss": 0.0689, "theoretical_loss": 3.3991048735377194, "tokens_seen": 2480406528 }, { "epoch": 0.75, "learning_rate": 0.0002507623174450329, "loss": 0.0664, "theoretical_loss": 3.399076415561362, "tokens_seen": 2480668672 }, { "epoch": 0.75, "learning_rate": 0.0002506820735034505, "loss": 0.0668, "theoretical_loss": 3.399047961434074, "tokens_seen": 2480930816 }, { "epoch": 0.75, "learning_rate": 0.00025060182956186813, "loss": 0.0653, "theoretical_loss": 3.399019511154929, "tokens_seen": 2481192960 }, { "epoch": 0.75, "learning_rate": 0.00025052158562028563, "loss": 0.0647, "theoretical_loss": 3.398991064722999, "tokens_seen": 2481455104 }, { "epoch": 0.75, "learning_rate": 0.00025044134167870324, "loss": 0.0677, "theoretical_loss": 3.3989626221373586, "tokens_seen": 2481717248 }, { "epoch": 0.75, "learning_rate": 0.00025036109773712086, "loss": 0.0657, "theoretical_loss": 3.3989341833970803, "tokens_seen": 2481979392 }, { "epoch": 0.75, "learning_rate": 0.00025028085379553847, "loss": 0.0641, "theoretical_loss": 3.398905748501239, "tokens_seen": 2482241536 }, { "epoch": 0.75, "learning_rate": 0.00025020060985395603, "loss": 0.0657, "theoretical_loss": 3.3988773174489086, "tokens_seen": 2482503680 }, { "epoch": 0.75, "learning_rate": 0.00025012036591237364, "loss": 0.0668, "theoretical_loss": 3.3988488902391643, "tokens_seen": 2482765824 }, { "epoch": 0.75, "learning_rate": 0.00025004012197079125, "loss": 0.0651, "theoretical_loss": 3.3988204668710806, "tokens_seen": 2483027968 }, { "epoch": 0.75, "learning_rate": 0.0002499598780292088, "loss": 0.0647, "theoretical_loss": 3.398792047343733, "tokens_seen": 2483290112 }, { "epoch": 0.75, "learning_rate": 0.0002498796340876264, "loss": 0.0669, "theoretical_loss": 3.3987636316561978, "tokens_seen": 2483552256 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.0003351647756062448, "objective/train/docs_used": 903880, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3003116846084595, "objective/train/original_loss": 1.30031156539917, "objective/train/theoretical_loss": 3.3987352198075502, "objective/train/tokens_used": 2504274400, "objective/train/value_avg": -0.00673675537109375, "objective/train/value_loss": 0.00015308342699427158, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.38037109375, "objective/train/value_reward_corr": 0.7115446243621768, "objective/train/value_std": 0.01253509521484375, "objective/train/weight_avg": 1.0004048347473145, "objective/train/weighted_lm_loss": 1.3008301258087158, "objective/train/weights_max": 1.1297332048416138, "objective/train/weights_min": 0.3682253658771515, "theoretical_loss": 3.3987352198075502, "tokens_seen": 2483814400 }, { "epoch": 0.75, "learning_rate": 0.000249799390146044, "loss": 0.0664, "theoretical_loss": 3.3987352198075502, "tokens_seen": 2483814400 }, { "epoch": 0.75, "learning_rate": 0.00024971914620446154, "loss": 0.0668, "theoretical_loss": 3.3987068117968664, "tokens_seen": 2484076544 }, { "epoch": 0.75, "learning_rate": 0.00024963890226287915, "loss": 0.0634, "theoretical_loss": 3.3986784076232235, "tokens_seen": 2484338688 }, { "epoch": 0.75, "learning_rate": 0.00024955865832129677, "loss": 0.0625, "theoretical_loss": 3.3986500072856987, "tokens_seen": 2484600832 }, { "epoch": 0.75, "learning_rate": 0.0002494784143797143, "loss": 0.0642, "theoretical_loss": 3.398621610783368, "tokens_seen": 2484862976 }, { "epoch": 0.75, "learning_rate": 0.00024939817043813194, "loss": 0.0665, "theoretical_loss": 3.3985932181153107, "tokens_seen": 2485125120 }, { "epoch": 0.75, "learning_rate": 0.00024931792649654955, "loss": 0.0655, "theoretical_loss": 3.398564829280603, "tokens_seen": 2485387264 }, { "epoch": 0.75, "learning_rate": 0.0002492376825549671, "loss": 0.0648, "theoretical_loss": 3.3985364442783244, "tokens_seen": 2485649408 }, { "epoch": 0.75, "learning_rate": 0.00024915743861338467, "loss": 0.0655, "theoretical_loss": 3.398508063107553, "tokens_seen": 2485911552 }, { "epoch": 0.75, "learning_rate": 0.0002490771946718023, "loss": 0.0662, "theoretical_loss": 3.3984796857673674, "tokens_seen": 2486173696 }, { "epoch": 0.75, "learning_rate": 0.0002489969507302199, "loss": 0.0655, "theoretical_loss": 3.3984513122568467, "tokens_seen": 2486435840 }, { "epoch": 0.75, "learning_rate": 0.00024891670678863745, "loss": 0.0676, "theoretical_loss": 3.3984229425750714, "tokens_seen": 2486697984 }, { "epoch": 0.75, "learning_rate": 0.00024883646284705506, "loss": 0.0654, "theoretical_loss": 3.3983945767211203, "tokens_seen": 2486960128 }, { "epoch": 0.75, "learning_rate": 0.0002487562189054726, "loss": 0.066, "theoretical_loss": 3.3983662146940734, "tokens_seen": 2487222272 }, { "epoch": 0.75, "learning_rate": 0.00024867597496389023, "loss": 0.0661, "theoretical_loss": 3.398337856493012, "tokens_seen": 2487484416 }, { "epoch": 0.75, "learning_rate": 0.0002485957310223078, "loss": 0.0664, "theoretical_loss": 3.3983095021170158, "tokens_seen": 2487746560 }, { "epoch": 0.75, "learning_rate": 0.0002485154870807254, "loss": 0.0663, "theoretical_loss": 3.398281151565167, "tokens_seen": 2488008704 }, { "epoch": 0.75, "learning_rate": 0.000248435243139143, "loss": 0.0694, "theoretical_loss": 3.398252804836546, "tokens_seen": 2488270848 }, { "epoch": 0.75, "learning_rate": 0.0002483549991975606, "loss": 0.0663, "theoretical_loss": 3.3982244619302358, "tokens_seen": 2488532992 }, { "epoch": 0.75, "learning_rate": 0.0002482747552559782, "loss": 0.0678, "theoretical_loss": 3.398196122845317, "tokens_seen": 2488795136 }, { "epoch": 0.75, "learning_rate": 0.00024819451131439575, "loss": 0.0672, "theoretical_loss": 3.3981677875808725, "tokens_seen": 2489057280 }, { "epoch": 0.75, "learning_rate": 0.00024811426737281336, "loss": 0.065, "theoretical_loss": 3.3981394561359854, "tokens_seen": 2489319424 }, { "epoch": 0.75, "learning_rate": 0.0002480340234312309, "loss": 0.0682, "theoretical_loss": 3.398111128509738, "tokens_seen": 2489581568 }, { "epoch": 0.75, "learning_rate": 0.00024795377948964853, "loss": 0.065, "theoretical_loss": 3.3980828047012146, "tokens_seen": 2489843712 }, { "epoch": 0.75, "learning_rate": 0.00024787353554806614, "loss": 0.0638, "theoretical_loss": 3.3980544847094976, "tokens_seen": 2490105856 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.0007647961028851569, "objective/train/docs_used": 906478, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3144490718841553, "objective/train/original_loss": 1.3144491910934448, "objective/train/theoretical_loss": 3.3980261685336712, "objective/train/tokens_used": 2510828000, "objective/train/value_avg": -0.00479888916015625, "objective/train/value_loss": 0.00020321377087384462, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.38916015625, "objective/train/value_reward_corr": 0.5756124228481705, "objective/train/value_std": 0.00930023193359375, "objective/train/weight_avg": 1.0008529424667358, "objective/train/weighted_lm_loss": 1.315671682357788, "objective/train/weights_max": 1.3893797397613525, "objective/train/weights_min": 0.3708355128765106, "theoretical_loss": 3.3980261685336712, "tokens_seen": 2490368000 }, { "epoch": 0.75, "learning_rate": 0.0002477932916064837, "loss": 0.0655, "theoretical_loss": 3.3980261685336712, "tokens_seen": 2490368000 }, { "epoch": 0.75, "learning_rate": 0.0002477130476649013, "loss": 0.0635, "theoretical_loss": 3.39799785617282, "tokens_seen": 2490630144 }, { "epoch": 0.75, "learning_rate": 0.00024763280372331887, "loss": 0.0664, "theoretical_loss": 3.3979695476260288, "tokens_seen": 2490892288 }, { "epoch": 0.75, "learning_rate": 0.0002475525597817365, "loss": 0.0668, "theoretical_loss": 3.397941242892382, "tokens_seen": 2491154432 }, { "epoch": 0.76, "learning_rate": 0.00024747231584015404, "loss": 0.0697, "theoretical_loss": 3.397912941970965, "tokens_seen": 2491416576 }, { "epoch": 0.76, "learning_rate": 0.00024739207189857166, "loss": 0.0677, "theoretical_loss": 3.3978846448608633, "tokens_seen": 2491678720 }, { "epoch": 0.76, "learning_rate": 0.00024731182795698927, "loss": 0.0669, "theoretical_loss": 3.3978563515611624, "tokens_seen": 2491940864 }, { "epoch": 0.76, "learning_rate": 0.00024723158401540683, "loss": 0.0674, "theoretical_loss": 3.397828062070949, "tokens_seen": 2492203008 }, { "epoch": 0.76, "learning_rate": 0.00024715134007382444, "loss": 0.0639, "theoretical_loss": 3.3977997763893093, "tokens_seen": 2492465152 }, { "epoch": 0.76, "learning_rate": 0.000247071096132242, "loss": 0.069, "theoretical_loss": 3.3977714945153297, "tokens_seen": 2492727296 }, { "epoch": 0.76, "learning_rate": 0.0002469908521906596, "loss": 0.0653, "theoretical_loss": 3.397743216448098, "tokens_seen": 2492989440 }, { "epoch": 0.76, "learning_rate": 0.0002469106082490772, "loss": 0.0669, "theoretical_loss": 3.3977149421867012, "tokens_seen": 2493251584 }, { "epoch": 0.76, "learning_rate": 0.0002468303643074948, "loss": 0.0684, "theoretical_loss": 3.397686671730227, "tokens_seen": 2493513728 }, { "epoch": 0.76, "learning_rate": 0.0002467501203659124, "loss": 0.0656, "theoretical_loss": 3.3976584050777636, "tokens_seen": 2493775872 }, { "epoch": 0.76, "learning_rate": 0.00024666987642432995, "loss": 0.0643, "theoretical_loss": 3.397630142228399, "tokens_seen": 2494038016 }, { "epoch": 0.76, "learning_rate": 0.00024658963248274757, "loss": 0.0657, "theoretical_loss": 3.3976018831812222, "tokens_seen": 2494300160 }, { "epoch": 0.76, "learning_rate": 0.0002465093885411651, "loss": 0.0661, "theoretical_loss": 3.3975736279353224, "tokens_seen": 2494562304 }, { "epoch": 0.76, "learning_rate": 0.00024642914459958274, "loss": 0.0628, "theoretical_loss": 3.3975453764897883, "tokens_seen": 2494824448 }, { "epoch": 0.76, "learning_rate": 0.00024634890065800035, "loss": 0.0651, "theoretical_loss": 3.3975171288437096, "tokens_seen": 2495086592 }, { "epoch": 0.76, "learning_rate": 0.0002462686567164179, "loss": 0.0644, "theoretical_loss": 3.397488884996177, "tokens_seen": 2495348736 }, { "epoch": 0.76, "learning_rate": 0.0002461884127748355, "loss": 0.0658, "theoretical_loss": 3.3974606449462796, "tokens_seen": 2495610880 }, { "epoch": 0.76, "learning_rate": 0.0002461081688332531, "loss": 0.0655, "theoretical_loss": 3.3974324086931085, "tokens_seen": 2495873024 }, { "epoch": 0.76, "learning_rate": 0.0002460279248916707, "loss": 0.0662, "theoretical_loss": 3.3974041762357547, "tokens_seen": 2496135168 }, { "epoch": 0.76, "learning_rate": 0.00024594768095008825, "loss": 0.0658, "theoretical_loss": 3.397375947573309, "tokens_seen": 2496397312 }, { "epoch": 0.76, "learning_rate": 0.00024586743700850586, "loss": 0.0647, "theoretical_loss": 3.397347722704863, "tokens_seen": 2496659456 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.0009059284930117428, "objective/train/docs_used": 908925, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2855145931243896, "objective/train/original_loss": 1.2855145931243896, "objective/train/theoretical_loss": 3.397319501629509, "objective/train/tokens_used": 2517381600, "objective/train/value_avg": -0.00823974609375, "objective/train/value_loss": 0.00019895529840141535, "objective/train/value_max": -2.86102294921875e-05, "objective/train/value_min": -0.343994140625, "objective/train/value_reward_corr": 0.6927096300481003, "objective/train/value_std": 0.01464080810546875, "objective/train/weight_avg": 1.000998854637146, "objective/train/weighted_lm_loss": 1.2875512838363647, "objective/train/weights_max": 1.373934268951416, "objective/train/weights_min": 0.3924909830093384, "theoretical_loss": 3.397319501629509, "tokens_seen": 2496921600 }, { "epoch": 0.76, "learning_rate": 0.0002457871930669235, "loss": 0.0664, "theoretical_loss": 3.397319501629509, "tokens_seen": 2496921600 }, { "epoch": 0.76, "learning_rate": 0.00024570694912534103, "loss": 0.0645, "theoretical_loss": 3.397291284346338, "tokens_seen": 2497183744 }, { "epoch": 0.76, "learning_rate": 0.00024562670518375865, "loss": 0.0652, "theoretical_loss": 3.397263070854444, "tokens_seen": 2497445888 }, { "epoch": 0.76, "learning_rate": 0.0002455464612421762, "loss": 0.066, "theoretical_loss": 3.397234861152918, "tokens_seen": 2497708032 }, { "epoch": 0.76, "learning_rate": 0.0002454662173005938, "loss": 0.066, "theoretical_loss": 3.397206655240854, "tokens_seen": 2497970176 }, { "epoch": 0.76, "learning_rate": 0.0002453859733590114, "loss": 0.0639, "theoretical_loss": 3.3971784531173457, "tokens_seen": 2498232320 }, { "epoch": 0.76, "learning_rate": 0.000245305729417429, "loss": 0.0638, "theoretical_loss": 3.397150254781486, "tokens_seen": 2498494464 }, { "epoch": 0.76, "learning_rate": 0.0002452254854758466, "loss": 0.0672, "theoretical_loss": 3.3971220602323693, "tokens_seen": 2498756608 }, { "epoch": 0.76, "learning_rate": 0.00024514524153426416, "loss": 0.0666, "theoretical_loss": 3.3970938694690895, "tokens_seen": 2499018752 }, { "epoch": 0.76, "learning_rate": 0.00024506499759268177, "loss": 0.0636, "theoretical_loss": 3.3970656824907417, "tokens_seen": 2499280896 }, { "epoch": 0.76, "learning_rate": 0.00024498475365109933, "loss": 0.0649, "theoretical_loss": 3.3970374992964207, "tokens_seen": 2499543040 }, { "epoch": 0.76, "learning_rate": 0.00024490450970951694, "loss": 0.0664, "theoretical_loss": 3.3970093198852216, "tokens_seen": 2499805184 }, { "epoch": 0.76, "learning_rate": 0.00024482426576793456, "loss": 0.0669, "theoretical_loss": 3.3969811442562396, "tokens_seen": 2500067328 }, { "epoch": 0.76, "learning_rate": 0.0002447440218263521, "loss": 0.0628, "theoretical_loss": 3.396952972408571, "tokens_seen": 2500329472 }, { "epoch": 0.76, "learning_rate": 0.0002446637778847697, "loss": 0.0656, "theoretical_loss": 3.3969248043413125, "tokens_seen": 2500591616 }, { "epoch": 0.76, "learning_rate": 0.0002445835339431873, "loss": 0.0642, "theoretical_loss": 3.3968966400535594, "tokens_seen": 2500853760 }, { "epoch": 0.76, "learning_rate": 0.0002445032900016049, "loss": 0.0647, "theoretical_loss": 3.396868479544409, "tokens_seen": 2501115904 }, { "epoch": 0.76, "learning_rate": 0.00024442304606002246, "loss": 0.0629, "theoretical_loss": 3.3968403228129587, "tokens_seen": 2501378048 }, { "epoch": 0.76, "learning_rate": 0.00024434280211844007, "loss": 0.0613, "theoretical_loss": 3.396812169858306, "tokens_seen": 2501640192 }, { "epoch": 0.76, "learning_rate": 0.0002442625581768577, "loss": 0.0648, "theoretical_loss": 3.396784020679547, "tokens_seen": 2501902336 }, { "epoch": 0.76, "learning_rate": 0.00024418231423527524, "loss": 0.0661, "theoretical_loss": 3.3967558752757823, "tokens_seen": 2502164480 }, { "epoch": 0.76, "learning_rate": 0.00024410207029369282, "loss": 0.0682, "theoretical_loss": 3.396727733646108, "tokens_seen": 2502426624 }, { "epoch": 0.76, "learning_rate": 0.00024402182635211044, "loss": 0.0652, "theoretical_loss": 3.396699595789624, "tokens_seen": 2502688768 }, { "epoch": 0.76, "learning_rate": 0.00024394158241052802, "loss": 0.0667, "theoretical_loss": 3.396671461705429, "tokens_seen": 2502950912 }, { "epoch": 0.76, "learning_rate": 0.00024386133846894558, "loss": 0.0652, "theoretical_loss": 3.396643331392622, "tokens_seen": 2503213056 }, { "epoch": 0.76, "objective/train/advantage_avg": 3.1022223993204534e-05, "objective/train/docs_used": 911421, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.350574254989624, "objective/train/original_loss": 1.350574254989624, "objective/train/theoretical_loss": 3.3966152048503027, "objective/train/tokens_used": 2523935200, "objective/train/value_avg": -0.0098724365234375, "objective/train/value_loss": 0.00019183217955287546, "objective/train/value_max": -5.435943603515625e-05, "objective/train/value_min": -0.349365234375, "objective/train/value_reward_corr": 0.8088936830272246, "objective/train/value_std": 0.019378662109375, "objective/train/weight_avg": 1.0001190900802612, "objective/train/weighted_lm_loss": 1.3499020338058472, "objective/train/weights_max": 1.1969894170761108, "objective/train/weights_min": 0.3697309195995331, "theoretical_loss": 3.3966152048503027, "tokens_seen": 2503475200 }, { "epoch": 0.76, "learning_rate": 0.0002437810945273632, "loss": 0.0666, "theoretical_loss": 3.3966152048503027, "tokens_seen": 2503475200 }, { "epoch": 0.76, "learning_rate": 0.00024370085058578078, "loss": 0.0629, "theoretical_loss": 3.396587082077571, "tokens_seen": 2503737344 }, { "epoch": 0.76, "learning_rate": 0.00024362060664419836, "loss": 0.0672, "theoretical_loss": 3.3965589630735273, "tokens_seen": 2503999488 }, { "epoch": 0.76, "learning_rate": 0.00024354036270261595, "loss": 0.0622, "theoretical_loss": 3.3965308478372718, "tokens_seen": 2504261632 }, { "epoch": 0.76, "learning_rate": 0.00024346011876103356, "loss": 0.0652, "theoretical_loss": 3.3965027363679052, "tokens_seen": 2504523776 }, { "epoch": 0.76, "learning_rate": 0.00024337987481945115, "loss": 0.0652, "theoretical_loss": 3.3964746286645293, "tokens_seen": 2504785920 }, { "epoch": 0.76, "learning_rate": 0.0002432996308778687, "loss": 0.0631, "theoretical_loss": 3.3964465247262448, "tokens_seen": 2505048064 }, { "epoch": 0.76, "learning_rate": 0.00024321938693628632, "loss": 0.0649, "theoretical_loss": 3.3964184245521536, "tokens_seen": 2505310208 }, { "epoch": 0.76, "learning_rate": 0.0002431391429947039, "loss": 0.0674, "theoretical_loss": 3.396390328141358, "tokens_seen": 2505572352 }, { "epoch": 0.76, "learning_rate": 0.00024305889905312152, "loss": 0.0655, "theoretical_loss": 3.3963622354929597, "tokens_seen": 2505834496 }, { "epoch": 0.76, "learning_rate": 0.00024297865511153908, "loss": 0.0653, "theoretical_loss": 3.396334146606062, "tokens_seen": 2506096640 }, { "epoch": 0.76, "learning_rate": 0.00024289841116995666, "loss": 0.0633, "theoretical_loss": 3.396306061479768, "tokens_seen": 2506358784 }, { "epoch": 0.76, "learning_rate": 0.00024281816722837427, "loss": 0.0671, "theoretical_loss": 3.3962779801131804, "tokens_seen": 2506620928 }, { "epoch": 0.76, "learning_rate": 0.00024273792328679183, "loss": 0.0667, "theoretical_loss": 3.3962499025054034, "tokens_seen": 2506883072 }, { "epoch": 0.76, "learning_rate": 0.00024265767934520945, "loss": 0.0637, "theoretical_loss": 3.3962218286555403, "tokens_seen": 2507145216 }, { "epoch": 0.76, "learning_rate": 0.00024257743540362703, "loss": 0.0624, "theoretical_loss": 3.3961937585626956, "tokens_seen": 2507407360 }, { "epoch": 0.76, "learning_rate": 0.00024249719146204464, "loss": 0.0655, "theoretical_loss": 3.3961656922259738, "tokens_seen": 2507669504 }, { "epoch": 0.76, "learning_rate": 0.0002424169475204622, "loss": 0.0664, "theoretical_loss": 3.3961376296444796, "tokens_seen": 2507931648 }, { "epoch": 0.76, "learning_rate": 0.0002423367035788798, "loss": 0.0665, "theoretical_loss": 3.396109570817318, "tokens_seen": 2508193792 }, { "epoch": 0.76, "learning_rate": 0.0002422564596372974, "loss": 0.0655, "theoretical_loss": 3.3960815157435946, "tokens_seen": 2508455936 }, { "epoch": 0.76, "learning_rate": 0.00024217621569571499, "loss": 0.0652, "theoretical_loss": 3.3960534644224154, "tokens_seen": 2508718080 }, { "epoch": 0.76, "learning_rate": 0.00024209597175413257, "loss": 0.0671, "theoretical_loss": 3.3960254168528863, "tokens_seen": 2508980224 }, { "epoch": 0.76, "learning_rate": 0.00024201572781255016, "loss": 0.0622, "theoretical_loss": 3.395997373034113, "tokens_seen": 2509242368 }, { "epoch": 0.76, "learning_rate": 0.00024193548387096774, "loss": 0.0685, "theoretical_loss": 3.395969332965203, "tokens_seen": 2509504512 }, { "epoch": 0.76, "learning_rate": 0.00024185523992938533, "loss": 0.0648, "theoretical_loss": 3.3959412966452627, "tokens_seen": 2509766656 }, { "epoch": 0.76, "objective/train/advantage_avg": -0.0003518318699207157, "objective/train/docs_used": 913305, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.370742917060852, "objective/train/original_loss": 1.370742917060852, "objective/train/theoretical_loss": 3.3959132640733998, "objective/train/tokens_used": 2530488800, "objective/train/value_avg": -0.00812530517578125, "objective/train/value_loss": 0.0003890729567501694, "objective/train/value_max": -1.722574234008789e-05, "objective/train/value_min": -0.91796875, "objective/train/value_reward_corr": 0.7049164524146272, "objective/train/value_std": 0.0178070068359375, "objective/train/weight_avg": 0.9998188614845276, "objective/train/weighted_lm_loss": 1.3701258897781372, "objective/train/weights_max": 1.7972010374069214, "objective/train/weights_min": 0.36976051330566406, "theoretical_loss": 3.3959132640733998, "tokens_seen": 2510028800 }, { "epoch": 0.76, "learning_rate": 0.0002417749959878029, "loss": 0.0657, "theoretical_loss": 3.3959132640733998, "tokens_seen": 2510028800 }, { "epoch": 0.76, "learning_rate": 0.00024169475204622053, "loss": 0.0669, "theoretical_loss": 3.395885235248721, "tokens_seen": 2510290944 }, { "epoch": 0.76, "learning_rate": 0.0002416145081046381, "loss": 0.0672, "theoretical_loss": 3.3958572101703353, "tokens_seen": 2510553088 }, { "epoch": 0.76, "learning_rate": 0.0002415342641630557, "loss": 0.0627, "theoretical_loss": 3.3958291888373506, "tokens_seen": 2510815232 }, { "epoch": 0.76, "learning_rate": 0.00024145402022147328, "loss": 0.0654, "theoretical_loss": 3.3958011712488747, "tokens_seen": 2511077376 }, { "epoch": 0.76, "learning_rate": 0.00024137377627989087, "loss": 0.0668, "theoretical_loss": 3.395773157404017, "tokens_seen": 2511339520 }, { "epoch": 0.76, "learning_rate": 0.00024129353233830848, "loss": 0.069, "theoretical_loss": 3.395745147301887, "tokens_seen": 2511601664 }, { "epoch": 0.76, "learning_rate": 0.00024121328839672604, "loss": 0.0661, "theoretical_loss": 3.395717140941593, "tokens_seen": 2511863808 }, { "epoch": 0.76, "learning_rate": 0.00024113304445514365, "loss": 0.0668, "theoretical_loss": 3.395689138322245, "tokens_seen": 2512125952 }, { "epoch": 0.76, "learning_rate": 0.00024105280051356124, "loss": 0.0655, "theoretical_loss": 3.3956611394429537, "tokens_seen": 2512388096 }, { "epoch": 0.76, "learning_rate": 0.0002409725565719788, "loss": 0.0639, "theoretical_loss": 3.395633144302829, "tokens_seen": 2512650240 }, { "epoch": 0.76, "learning_rate": 0.0002408923126303964, "loss": 0.0655, "theoretical_loss": 3.3956051529009814, "tokens_seen": 2512912384 }, { "epoch": 0.76, "learning_rate": 0.000240812068688814, "loss": 0.0654, "theoretical_loss": 3.3955771652365216, "tokens_seen": 2513174528 }, { "epoch": 0.76, "learning_rate": 0.0002407318247472316, "loss": 0.0667, "theoretical_loss": 3.395549181308562, "tokens_seen": 2513436672 }, { "epoch": 0.76, "learning_rate": 0.00024065158080564916, "loss": 0.0682, "theoretical_loss": 3.3955212011162126, "tokens_seen": 2513698816 }, { "epoch": 0.76, "learning_rate": 0.00024057133686406678, "loss": 0.0655, "theoretical_loss": 3.395493224658586, "tokens_seen": 2513960960 }, { "epoch": 0.76, "learning_rate": 0.00024049109292248436, "loss": 0.0627, "theoretical_loss": 3.395465251934794, "tokens_seen": 2514223104 }, { "epoch": 0.76, "learning_rate": 0.00024041084898090195, "loss": 0.0636, "theoretical_loss": 3.39543728294395, "tokens_seen": 2514485248 }, { "epoch": 0.76, "learning_rate": 0.00024033060503931953, "loss": 0.0631, "theoretical_loss": 3.395409317685165, "tokens_seen": 2514747392 }, { "epoch": 0.76, "learning_rate": 0.00024025036109773712, "loss": 0.0661, "theoretical_loss": 3.395381356157554, "tokens_seen": 2515009536 }, { "epoch": 0.76, "learning_rate": 0.00024017011715615473, "loss": 0.0676, "theoretical_loss": 3.3953533983602293, "tokens_seen": 2515271680 }, { "epoch": 0.76, "learning_rate": 0.00024008987321457232, "loss": 0.066, "theoretical_loss": 3.3953254442923044, "tokens_seen": 2515533824 }, { "epoch": 0.76, "learning_rate": 0.00024000962927298988, "loss": 0.0672, "theoretical_loss": 3.3952974939528935, "tokens_seen": 2515795968 }, { "epoch": 0.76, "learning_rate": 0.0002399293853314075, "loss": 0.0641, "theoretical_loss": 3.395269547341111, "tokens_seen": 2516058112 }, { "epoch": 0.76, "learning_rate": 0.00023984914138982507, "loss": 0.0646, "theoretical_loss": 3.3952416044560714, "tokens_seen": 2516320256 }, { "epoch": 0.76, "objective/train/advantage_avg": -0.0005290715489536524, "objective/train/docs_used": 916228, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3841131925582886, "objective/train/original_loss": 1.384113073348999, "objective/train/theoretical_loss": 3.3952136652968896, "objective/train/tokens_used": 2537042400, "objective/train/value_avg": -0.0087890625, "objective/train/value_loss": 0.0003163471119478345, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.72412109375, "objective/train/value_reward_corr": 0.6784622902242732, "objective/train/value_std": 0.0157012939453125, "objective/train/weight_avg": 0.9996117353439331, "objective/train/weighted_lm_loss": 1.383963942527771, "objective/train/weights_max": 1.3841378688812256, "objective/train/weights_min": 0.3704989552497864, "theoretical_loss": 3.3952136652968896, "tokens_seen": 2516582400 }, { "epoch": 0.76, "learning_rate": 0.00023976889744824266, "loss": 0.0687, "theoretical_loss": 3.3952136652968896, "tokens_seen": 2516582400 }, { "epoch": 0.76, "learning_rate": 0.00023968865350666024, "loss": 0.0652, "theoretical_loss": 3.3951857298626806, "tokens_seen": 2516844544 }, { "epoch": 0.76, "learning_rate": 0.00023960840956507786, "loss": 0.0649, "theoretical_loss": 3.3951577981525602, "tokens_seen": 2517106688 }, { "epoch": 0.76, "learning_rate": 0.00023952816562349544, "loss": 0.0657, "theoretical_loss": 3.395129870165644, "tokens_seen": 2517368832 }, { "epoch": 0.76, "learning_rate": 0.000239447921681913, "loss": 0.0666, "theoretical_loss": 3.3951019459010476, "tokens_seen": 2517630976 }, { "epoch": 0.76, "learning_rate": 0.0002393676777403306, "loss": 0.0636, "theoretical_loss": 3.3950740253578875, "tokens_seen": 2517893120 }, { "epoch": 0.76, "learning_rate": 0.0002392874337987482, "loss": 0.0651, "theoretical_loss": 3.3950461085352814, "tokens_seen": 2518155264 }, { "epoch": 0.76, "learning_rate": 0.0002392071898571658, "loss": 0.0659, "theoretical_loss": 3.3950181954323453, "tokens_seen": 2518417408 }, { "epoch": 0.76, "learning_rate": 0.00023912694591558337, "loss": 0.0654, "theoretical_loss": 3.3949902860481966, "tokens_seen": 2518679552 }, { "epoch": 0.76, "learning_rate": 0.00023904670197400096, "loss": 0.0655, "theoretical_loss": 3.394962380381953, "tokens_seen": 2518941696 }, { "epoch": 0.76, "learning_rate": 0.00023896645803241857, "loss": 0.0659, "theoretical_loss": 3.3949344784327327, "tokens_seen": 2519203840 }, { "epoch": 0.76, "learning_rate": 0.00023888621409083613, "loss": 0.0663, "theoretical_loss": 3.3949065801996534, "tokens_seen": 2519465984 }, { "epoch": 0.76, "learning_rate": 0.00023880597014925374, "loss": 0.0631, "theoretical_loss": 3.3948786856818334, "tokens_seen": 2519728128 }, { "epoch": 0.76, "learning_rate": 0.00023872572620767132, "loss": 0.0645, "theoretical_loss": 3.3948507948783924, "tokens_seen": 2519990272 }, { "epoch": 0.76, "learning_rate": 0.00023864548226608894, "loss": 0.0634, "theoretical_loss": 3.3948229077884484, "tokens_seen": 2520252416 }, { "epoch": 0.76, "learning_rate": 0.0002385652383245065, "loss": 0.0659, "theoretical_loss": 3.394795024411122, "tokens_seen": 2520514560 }, { "epoch": 0.76, "learning_rate": 0.00023848499438292408, "loss": 0.0665, "theoretical_loss": 3.394767144745532, "tokens_seen": 2520776704 }, { "epoch": 0.76, "learning_rate": 0.0002384047504413417, "loss": 0.0691, "theoretical_loss": 3.394739268790798, "tokens_seen": 2521038848 }, { "epoch": 0.76, "learning_rate": 0.00023832450649975928, "loss": 0.0644, "theoretical_loss": 3.3947113965460414, "tokens_seen": 2521300992 }, { "epoch": 0.76, "learning_rate": 0.00023824426255817686, "loss": 0.0674, "theoretical_loss": 3.3946835280103826, "tokens_seen": 2521563136 }, { "epoch": 0.76, "learning_rate": 0.00023816401861659445, "loss": 0.066, "theoretical_loss": 3.3946556631829417, "tokens_seen": 2521825280 }, { "epoch": 0.76, "learning_rate": 0.00023808377467501204, "loss": 0.0668, "theoretical_loss": 3.3946278020628404, "tokens_seen": 2522087424 }, { "epoch": 0.76, "learning_rate": 0.00023800353073342962, "loss": 0.0641, "theoretical_loss": 3.3945999446492, "tokens_seen": 2522349568 }, { "epoch": 0.76, "learning_rate": 0.0002379232867918472, "loss": 0.0652, "theoretical_loss": 3.3945720909411428, "tokens_seen": 2522611712 }, { "epoch": 0.76, "learning_rate": 0.00023784304285026482, "loss": 0.0654, "theoretical_loss": 3.3945442409377904, "tokens_seen": 2522873856 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.0008710318361409009, "objective/train/docs_used": 918647, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.381752371788025, "objective/train/original_loss": 1.3817524909973145, "objective/train/theoretical_loss": 3.394516394638265, "objective/train/tokens_used": 2543596000, "objective/train/value_avg": -0.0074615478515625, "objective/train/value_loss": 0.00024307436251547188, "objective/train/value_max": -4.13060188293457e-05, "objective/train/value_min": -0.70166015625, "objective/train/value_reward_corr": 0.6666992631073855, "objective/train/value_std": 0.013763427734375, "objective/train/weight_avg": 1.0009773969650269, "objective/train/weighted_lm_loss": 1.382368803024292, "objective/train/weights_max": 1.411948561668396, "objective/train/weights_min": 0.3690214455127716, "theoretical_loss": 3.394516394638265, "tokens_seen": 2523136000 }, { "epoch": 0.76, "learning_rate": 0.0002377627989086824, "loss": 0.0673, "theoretical_loss": 3.394516394638265, "tokens_seen": 2523136000 }, { "epoch": 0.76, "learning_rate": 0.0002376825549671, "loss": 0.0643, "theoretical_loss": 3.3944885520416896, "tokens_seen": 2523398144 }, { "epoch": 0.76, "learning_rate": 0.00023760231102551758, "loss": 0.0682, "theoretical_loss": 3.3944607131471876, "tokens_seen": 2523660288 }, { "epoch": 0.76, "learning_rate": 0.00023752206708393516, "loss": 0.0677, "theoretical_loss": 3.3944328779538813, "tokens_seen": 2523922432 }, { "epoch": 0.76, "learning_rate": 0.00023744182314235277, "loss": 0.0639, "theoretical_loss": 3.3944050464608955, "tokens_seen": 2524184576 }, { "epoch": 0.77, "learning_rate": 0.00023736157920077033, "loss": 0.0653, "theoretical_loss": 3.394377218667353, "tokens_seen": 2524446720 }, { "epoch": 0.77, "learning_rate": 0.00023728133525918795, "loss": 0.0628, "theoretical_loss": 3.3943493945723784, "tokens_seen": 2524708864 }, { "epoch": 0.77, "learning_rate": 0.00023720109131760553, "loss": 0.0677, "theoretical_loss": 3.394321574175096, "tokens_seen": 2524971008 }, { "epoch": 0.77, "learning_rate": 0.0002371208473760231, "loss": 0.0655, "theoretical_loss": 3.394293757474631, "tokens_seen": 2525233152 }, { "epoch": 0.77, "learning_rate": 0.0002370406034344407, "loss": 0.0682, "theoretical_loss": 3.394265944470108, "tokens_seen": 2525495296 }, { "epoch": 0.77, "learning_rate": 0.0002369603594928583, "loss": 0.0629, "theoretical_loss": 3.3942381351606525, "tokens_seen": 2525757440 }, { "epoch": 0.77, "learning_rate": 0.0002368801155512759, "loss": 0.067, "theoretical_loss": 3.3942103295453903, "tokens_seen": 2526019584 }, { "epoch": 0.77, "learning_rate": 0.00023679987160969346, "loss": 0.0637, "theoretical_loss": 3.394182527623448, "tokens_seen": 2526281728 }, { "epoch": 0.77, "learning_rate": 0.00023671962766811107, "loss": 0.0677, "theoretical_loss": 3.39415472939395, "tokens_seen": 2526543872 }, { "epoch": 0.77, "learning_rate": 0.00023663938372652866, "loss": 0.0646, "theoretical_loss": 3.3941269348560246, "tokens_seen": 2526806016 }, { "epoch": 0.77, "learning_rate": 0.00023655913978494624, "loss": 0.0663, "theoretical_loss": 3.3940991440087984, "tokens_seen": 2527068160 }, { "epoch": 0.77, "learning_rate": 0.00023647889584336383, "loss": 0.0682, "theoretical_loss": 3.3940713568513976, "tokens_seen": 2527330304 }, { "epoch": 0.77, "learning_rate": 0.0002363986519017814, "loss": 0.0647, "theoretical_loss": 3.394043573382951, "tokens_seen": 2527592448 }, { "epoch": 0.77, "learning_rate": 0.00023631840796019903, "loss": 0.0656, "theoretical_loss": 3.394015793602585, "tokens_seen": 2527854592 }, { "epoch": 0.77, "learning_rate": 0.00023623816401861658, "loss": 0.0654, "theoretical_loss": 3.393988017509429, "tokens_seen": 2528116736 }, { "epoch": 0.77, "learning_rate": 0.0002361579200770342, "loss": 0.0665, "theoretical_loss": 3.3939602451026096, "tokens_seen": 2528378880 }, { "epoch": 0.77, "learning_rate": 0.00023607767613545178, "loss": 0.0646, "theoretical_loss": 3.393932476381257, "tokens_seen": 2528641024 }, { "epoch": 0.77, "learning_rate": 0.00023599743219386937, "loss": 0.066, "theoretical_loss": 3.3939047113445, "tokens_seen": 2528903168 }, { "epoch": 0.77, "learning_rate": 0.00023591718825228695, "loss": 0.0658, "theoretical_loss": 3.393876949991467, "tokens_seen": 2529165312 }, { "epoch": 0.77, "learning_rate": 0.00023583694431070454, "loss": 0.0658, "theoretical_loss": 3.393849192321288, "tokens_seen": 2529427456 }, { "epoch": 0.77, "objective/train/advantage_avg": 8.658033038955182e-06, "objective/train/docs_used": 920541, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.329050064086914, "objective/train/original_loss": 1.3290499448776245, "objective/train/theoretical_loss": 3.3938214383330925, "objective/train/tokens_used": 2550149600, "objective/train/value_avg": -0.00835418701171875, "objective/train/value_loss": 0.0001360275491606444, "objective/train/value_max": -2.467632293701172e-05, "objective/train/value_min": -0.390869140625, "objective/train/value_reward_corr": 0.8806241136256157, "objective/train/value_std": 0.0211029052734375, "objective/train/weight_avg": 1.000072956085205, "objective/train/weighted_lm_loss": 1.3284049034118652, "objective/train/weights_max": 1.2012343406677246, "objective/train/weights_min": 0.3818925619125366, "theoretical_loss": 3.3938214383330925, "tokens_seen": 2529689600 }, { "epoch": 0.77, "learning_rate": 0.00023575670036912215, "loss": 0.0678, "theoretical_loss": 3.3938214383330925, "tokens_seen": 2529689600 }, { "epoch": 0.77, "learning_rate": 0.00023567645642753974, "loss": 0.0664, "theoretical_loss": 3.3937936880260113, "tokens_seen": 2529951744 }, { "epoch": 0.77, "learning_rate": 0.0002355962124859573, "loss": 0.0674, "theoretical_loss": 3.3937659413991743, "tokens_seen": 2530213888 }, { "epoch": 0.77, "learning_rate": 0.0002355159685443749, "loss": 0.066, "theoretical_loss": 3.3937381984517123, "tokens_seen": 2530476032 }, { "epoch": 0.77, "learning_rate": 0.0002354357246027925, "loss": 0.0634, "theoretical_loss": 3.3937104591827563, "tokens_seen": 2530738176 }, { "epoch": 0.77, "learning_rate": 0.0002353554806612101, "loss": 0.0658, "theoretical_loss": 3.3936827235914375, "tokens_seen": 2531000320 }, { "epoch": 0.77, "learning_rate": 0.00023527523671962766, "loss": 0.0665, "theoretical_loss": 3.3936549916768874, "tokens_seen": 2531262464 }, { "epoch": 0.77, "learning_rate": 0.00023519499277804528, "loss": 0.0683, "theoretical_loss": 3.3936272634382387, "tokens_seen": 2531524608 }, { "epoch": 0.77, "learning_rate": 0.00023511474883646286, "loss": 0.0664, "theoretical_loss": 3.3935995388746227, "tokens_seen": 2531786752 }, { "epoch": 0.77, "learning_rate": 0.00023503450489488042, "loss": 0.0634, "theoretical_loss": 3.393571817985172, "tokens_seen": 2532048896 }, { "epoch": 0.77, "learning_rate": 0.00023495426095329803, "loss": 0.0674, "theoretical_loss": 3.3935441007690197, "tokens_seen": 2532311040 }, { "epoch": 0.77, "learning_rate": 0.00023487401701171562, "loss": 0.0655, "theoretical_loss": 3.393516387225299, "tokens_seen": 2532573184 }, { "epoch": 0.77, "learning_rate": 0.00023479377307013323, "loss": 0.0654, "theoretical_loss": 3.393488677353142, "tokens_seen": 2532835328 }, { "epoch": 0.77, "learning_rate": 0.0002347135291285508, "loss": 0.0655, "theoretical_loss": 3.3934609711516845, "tokens_seen": 2533097472 }, { "epoch": 0.77, "learning_rate": 0.00023463328518696838, "loss": 0.0639, "theoretical_loss": 3.3934332686200586, "tokens_seen": 2533359616 }, { "epoch": 0.77, "learning_rate": 0.000234553041245386, "loss": 0.0665, "theoretical_loss": 3.3934055697573995, "tokens_seen": 2533621760 }, { "epoch": 0.77, "learning_rate": 0.00023447279730380357, "loss": 0.0664, "theoretical_loss": 3.393377874562841, "tokens_seen": 2533883904 }, { "epoch": 0.77, "learning_rate": 0.00023439255336222116, "loss": 0.0656, "theoretical_loss": 3.393350183035519, "tokens_seen": 2534146048 }, { "epoch": 0.77, "learning_rate": 0.00023431230942063874, "loss": 0.0644, "theoretical_loss": 3.3933224951745675, "tokens_seen": 2534408192 }, { "epoch": 0.77, "learning_rate": 0.00023423206547905636, "loss": 0.0656, "theoretical_loss": 3.3932948109791226, "tokens_seen": 2534670336 }, { "epoch": 0.77, "learning_rate": 0.00023415182153747392, "loss": 0.0667, "theoretical_loss": 3.39326713044832, "tokens_seen": 2534932480 }, { "epoch": 0.77, "learning_rate": 0.0002340715775958915, "loss": 0.0668, "theoretical_loss": 3.393239453581295, "tokens_seen": 2535194624 }, { "epoch": 0.77, "learning_rate": 0.0002339913336543091, "loss": 0.0671, "theoretical_loss": 3.393211780377185, "tokens_seen": 2535456768 }, { "epoch": 0.77, "learning_rate": 0.0002339110897127267, "loss": 0.0645, "theoretical_loss": 3.3931841108351257, "tokens_seen": 2535718912 }, { "epoch": 0.77, "learning_rate": 0.00023383084577114428, "loss": 0.0646, "theoretical_loss": 3.393156444954255, "tokens_seen": 2535981056 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.0009138497989624739, "objective/train/docs_used": 922895, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4049909114837646, "objective/train/original_loss": 1.4049906730651855, "objective/train/theoretical_loss": 3.393128782733709, "objective/train/tokens_used": 2556703200, "objective/train/value_avg": -0.006580352783203125, "objective/train/value_loss": 0.00024076194677036256, "objective/train/value_max": -2.5272369384765625e-05, "objective/train/value_min": -0.70556640625, "objective/train/value_reward_corr": 0.7010251590697951, "objective/train/value_std": 0.01537322998046875, "objective/train/weight_avg": 1.0010218620300293, "objective/train/weighted_lm_loss": 1.406140923500061, "objective/train/weights_max": 1.4373383522033691, "objective/train/weights_min": 0.36942780017852783, "theoretical_loss": 3.393128782733709, "tokens_seen": 2536243200 }, { "epoch": 0.77, "learning_rate": 0.00023375060182956187, "loss": 0.0671, "theoretical_loss": 3.393128782733709, "tokens_seen": 2536243200 }, { "epoch": 0.77, "learning_rate": 0.00023367035788797946, "loss": 0.0676, "theoretical_loss": 3.3931011241726248, "tokens_seen": 2536505344 }, { "epoch": 0.77, "learning_rate": 0.00023359011394639707, "loss": 0.0642, "theoretical_loss": 3.393073469270142, "tokens_seen": 2536767488 }, { "epoch": 0.77, "learning_rate": 0.00023350987000481463, "loss": 0.0652, "theoretical_loss": 3.393045818025397, "tokens_seen": 2537029632 }, { "epoch": 0.77, "learning_rate": 0.00023342962606323224, "loss": 0.0663, "theoretical_loss": 3.393018170437529, "tokens_seen": 2537291776 }, { "epoch": 0.77, "learning_rate": 0.00023334938212164982, "loss": 0.067, "theoretical_loss": 3.392990526505676, "tokens_seen": 2537553920 }, { "epoch": 0.77, "learning_rate": 0.0002332691381800674, "loss": 0.0658, "theoretical_loss": 3.3929628862289776, "tokens_seen": 2537816064 }, { "epoch": 0.77, "learning_rate": 0.000233188894238485, "loss": 0.0661, "theoretical_loss": 3.392935249606573, "tokens_seen": 2538078208 }, { "epoch": 0.77, "learning_rate": 0.00023310865029690258, "loss": 0.0674, "theoretical_loss": 3.3929076166376007, "tokens_seen": 2538340352 }, { "epoch": 0.77, "learning_rate": 0.0002330284063553202, "loss": 0.0633, "theoretical_loss": 3.3928799873212014, "tokens_seen": 2538602496 }, { "epoch": 0.77, "learning_rate": 0.00023294816241373775, "loss": 0.0651, "theoretical_loss": 3.3928523616565154, "tokens_seen": 2538864640 }, { "epoch": 0.77, "learning_rate": 0.00023286791847215536, "loss": 0.0637, "theoretical_loss": 3.3928247396426827, "tokens_seen": 2539126784 }, { "epoch": 0.77, "learning_rate": 0.00023278767453057295, "loss": 0.063, "theoretical_loss": 3.3927971212788437, "tokens_seen": 2539388928 }, { "epoch": 0.77, "learning_rate": 0.00023270743058899054, "loss": 0.0658, "theoretical_loss": 3.3927695065641394, "tokens_seen": 2539651072 }, { "epoch": 0.77, "learning_rate": 0.00023262718664740812, "loss": 0.0644, "theoretical_loss": 3.392741895497712, "tokens_seen": 2539913216 }, { "epoch": 0.77, "learning_rate": 0.0002325469427058257, "loss": 0.0628, "theoretical_loss": 3.392714288078702, "tokens_seen": 2540175360 }, { "epoch": 0.77, "learning_rate": 0.00023246669876424332, "loss": 0.0648, "theoretical_loss": 3.3926866843062515, "tokens_seen": 2540437504 }, { "epoch": 0.77, "learning_rate": 0.00023238645482266088, "loss": 0.0642, "theoretical_loss": 3.392659084179503, "tokens_seen": 2540699648 }, { "epoch": 0.77, "learning_rate": 0.0002323062108810785, "loss": 0.0632, "theoretical_loss": 3.3926314876975985, "tokens_seen": 2540961792 }, { "epoch": 0.77, "learning_rate": 0.00023222596693949608, "loss": 0.0643, "theoretical_loss": 3.3926038948596813, "tokens_seen": 2541223936 }, { "epoch": 0.77, "learning_rate": 0.00023214572299791366, "loss": 0.0649, "theoretical_loss": 3.3925763056648934, "tokens_seen": 2541486080 }, { "epoch": 0.77, "learning_rate": 0.00023206547905633125, "loss": 0.0649, "theoretical_loss": 3.3925487201123787, "tokens_seen": 2541748224 }, { "epoch": 0.77, "learning_rate": 0.00023198523511474883, "loss": 0.0671, "theoretical_loss": 3.392521138201281, "tokens_seen": 2542010368 }, { "epoch": 0.77, "learning_rate": 0.00023190499117316645, "loss": 0.0651, "theoretical_loss": 3.392493559930744, "tokens_seen": 2542272512 }, { "epoch": 0.77, "learning_rate": 0.00023182474723158403, "loss": 0.0668, "theoretical_loss": 3.3924659852999115, "tokens_seen": 2542534656 }, { "epoch": 0.77, "objective/train/advantage_avg": -0.00013724910968448967, "objective/train/docs_used": 925229, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.207066535949707, "objective/train/original_loss": 1.207066535949707, "objective/train/theoretical_loss": 3.3924384143079283, "objective/train/tokens_used": 2563256800, "objective/train/value_avg": -0.0070953369140625, "objective/train/value_loss": 0.00013547865091823041, "objective/train/value_max": -3.594160079956055e-05, "objective/train/value_min": -0.2379150390625, "objective/train/value_reward_corr": 0.7183617630226486, "objective/train/value_std": 0.0111541748046875, "objective/train/weight_avg": 0.9999263286590576, "objective/train/weighted_lm_loss": 1.2072526216506958, "objective/train/weights_max": 1.1096680164337158, "objective/train/weights_min": 0.3745289444923401, "theoretical_loss": 3.3924384143079283, "tokens_seen": 2542796800 }, { "epoch": 0.77, "learning_rate": 0.0002317445032900016, "loss": 0.0647, "theoretical_loss": 3.3924384143079283, "tokens_seen": 2542796800 }, { "epoch": 0.77, "learning_rate": 0.0002316642593484192, "loss": 0.0685, "theoretical_loss": 3.392410846953939, "tokens_seen": 2543058944 }, { "epoch": 0.77, "learning_rate": 0.0002315840154068368, "loss": 0.0663, "theoretical_loss": 3.3923832832370886, "tokens_seen": 2543321088 }, { "epoch": 0.77, "learning_rate": 0.00023150377146525437, "loss": 0.0672, "theoretical_loss": 3.392355723156523, "tokens_seen": 2543583232 }, { "epoch": 0.77, "learning_rate": 0.00023142352752367196, "loss": 0.0663, "theoretical_loss": 3.392328166711387, "tokens_seen": 2543845376 }, { "epoch": 0.77, "learning_rate": 0.00023134328358208957, "loss": 0.0652, "theoretical_loss": 3.3923006139008267, "tokens_seen": 2544107520 }, { "epoch": 0.77, "learning_rate": 0.00023126303964050716, "loss": 0.0642, "theoretical_loss": 3.3922730647239887, "tokens_seen": 2544369664 }, { "epoch": 0.77, "learning_rate": 0.00023118279569892471, "loss": 0.0633, "theoretical_loss": 3.392245519180019, "tokens_seen": 2544631808 }, { "epoch": 0.77, "learning_rate": 0.00023110255175734233, "loss": 0.0643, "theoretical_loss": 3.3922179772680643, "tokens_seen": 2544893952 }, { "epoch": 0.77, "learning_rate": 0.0002310223078157599, "loss": 0.0658, "theoretical_loss": 3.392190438987272, "tokens_seen": 2545156096 }, { "epoch": 0.77, "learning_rate": 0.00023094206387417753, "loss": 0.0667, "theoretical_loss": 3.39216290433679, "tokens_seen": 2545418240 }, { "epoch": 0.77, "learning_rate": 0.00023086181993259508, "loss": 0.0642, "theoretical_loss": 3.392135373315764, "tokens_seen": 2545680384 }, { "epoch": 0.77, "learning_rate": 0.00023078157599101267, "loss": 0.0636, "theoretical_loss": 3.392107845923344, "tokens_seen": 2545942528 }, { "epoch": 0.77, "learning_rate": 0.00023070133204943028, "loss": 0.0638, "theoretical_loss": 3.3920803221586775, "tokens_seen": 2546204672 }, { "epoch": 0.77, "learning_rate": 0.00023062108810784784, "loss": 0.0637, "theoretical_loss": 3.3920528020209124, "tokens_seen": 2546466816 }, { "epoch": 0.77, "learning_rate": 0.00023054084416626545, "loss": 0.0616, "theoretical_loss": 3.392025285509198, "tokens_seen": 2546728960 }, { "epoch": 0.77, "learning_rate": 0.00023046060022468304, "loss": 0.0633, "theoretical_loss": 3.391997772622683, "tokens_seen": 2546991104 }, { "epoch": 0.77, "learning_rate": 0.00023038035628310065, "loss": 0.062, "theoretical_loss": 3.3919702633605175, "tokens_seen": 2547253248 }, { "epoch": 0.77, "learning_rate": 0.0002303001123415182, "loss": 0.0619, "theoretical_loss": 3.391942757721851, "tokens_seen": 2547515392 }, { "epoch": 0.77, "learning_rate": 0.0002302198683999358, "loss": 0.065, "theoretical_loss": 3.3919152557058325, "tokens_seen": 2547777536 }, { "epoch": 0.77, "learning_rate": 0.0002301396244583534, "loss": 0.0637, "theoretical_loss": 3.3918877573116126, "tokens_seen": 2548039680 }, { "epoch": 0.77, "learning_rate": 0.000230059380516771, "loss": 0.066, "theoretical_loss": 3.3918602625383425, "tokens_seen": 2548301824 }, { "epoch": 0.77, "learning_rate": 0.00022997913657518858, "loss": 0.0646, "theoretical_loss": 3.391832771385172, "tokens_seen": 2548563968 }, { "epoch": 0.77, "learning_rate": 0.00022989889263360616, "loss": 0.0634, "theoretical_loss": 3.3918052838512533, "tokens_seen": 2548826112 }, { "epoch": 0.77, "learning_rate": 0.00022981864869202375, "loss": 0.0642, "theoretical_loss": 3.3917777999357366, "tokens_seen": 2549088256 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.0002214000269304961, "objective/train/docs_used": 927515, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.329919695854187, "objective/train/original_loss": 1.3299193382263184, "objective/train/theoretical_loss": 3.3917503196377745, "objective/train/tokens_used": 2569810400, "objective/train/value_avg": -0.008148193359375, "objective/train/value_loss": 0.00020550192857626826, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.71875, "objective/train/value_reward_corr": 0.7468589864842972, "objective/train/value_std": 0.01580810546875, "objective/train/weight_avg": 1.0003150701522827, "objective/train/weighted_lm_loss": 1.3300237655639648, "objective/train/weights_max": 1.4661985635757446, "objective/train/weights_min": 0.37047773599624634, "theoretical_loss": 3.3917503196377745, "tokens_seen": 2549350400 }, { "epoch": 0.77, "learning_rate": 0.00022973840475044136, "loss": 0.066, "theoretical_loss": 3.3917503196377745, "tokens_seen": 2549350400 }, { "epoch": 0.77, "learning_rate": 0.00022965816080885892, "loss": 0.0647, "theoretical_loss": 3.391722842956518, "tokens_seen": 2549612544 }, { "epoch": 0.77, "learning_rate": 0.00022957791686727653, "loss": 0.0627, "theoretical_loss": 3.3916953698911203, "tokens_seen": 2549874688 }, { "epoch": 0.77, "learning_rate": 0.00022949767292569412, "loss": 0.064, "theoretical_loss": 3.391667900440733, "tokens_seen": 2550136832 }, { "epoch": 0.77, "learning_rate": 0.0002294174289841117, "loss": 0.0646, "theoretical_loss": 3.3916404346045095, "tokens_seen": 2550398976 }, { "epoch": 0.77, "learning_rate": 0.0002293371850425293, "loss": 0.0642, "theoretical_loss": 3.3916129723816026, "tokens_seen": 2550661120 }, { "epoch": 0.77, "learning_rate": 0.00022925694110094688, "loss": 0.0654, "theoretical_loss": 3.3915855137711657, "tokens_seen": 2550923264 }, { "epoch": 0.77, "learning_rate": 0.0002291766971593645, "loss": 0.0618, "theoretical_loss": 3.3915580587723526, "tokens_seen": 2551185408 }, { "epoch": 0.77, "learning_rate": 0.00022909645321778205, "loss": 0.0622, "theoretical_loss": 3.391530607384317, "tokens_seen": 2551447552 }, { "epoch": 0.77, "learning_rate": 0.00022901620927619966, "loss": 0.0632, "theoretical_loss": 3.3915031596062133, "tokens_seen": 2551709696 }, { "epoch": 0.77, "learning_rate": 0.00022893596533461724, "loss": 0.0634, "theoretical_loss": 3.391475715437196, "tokens_seen": 2551971840 }, { "epoch": 0.77, "learning_rate": 0.00022885572139303486, "loss": 0.0623, "theoretical_loss": 3.39144827487642, "tokens_seen": 2552233984 }, { "epoch": 0.77, "learning_rate": 0.00022877547745145242, "loss": 0.0657, "theoretical_loss": 3.3914208379230395, "tokens_seen": 2552496128 }, { "epoch": 0.77, "learning_rate": 0.00022869523350987, "loss": 0.0656, "theoretical_loss": 3.3913934045762106, "tokens_seen": 2552758272 }, { "epoch": 0.77, "learning_rate": 0.00022861498956828761, "loss": 0.0624, "theoretical_loss": 3.3913659748350895, "tokens_seen": 2553020416 }, { "epoch": 0.77, "learning_rate": 0.00022853474562670517, "loss": 0.066, "theoretical_loss": 3.391338548698831, "tokens_seen": 2553282560 }, { "epoch": 0.77, "learning_rate": 0.00022845450168512278, "loss": 0.0644, "theoretical_loss": 3.391311126166592, "tokens_seen": 2553544704 }, { "epoch": 0.77, "learning_rate": 0.00022837425774354037, "loss": 0.0624, "theoretical_loss": 3.3912837072375286, "tokens_seen": 2553806848 }, { "epoch": 0.77, "learning_rate": 0.00022829401380195796, "loss": 0.0648, "theoretical_loss": 3.391256291910798, "tokens_seen": 2554068992 }, { "epoch": 0.77, "learning_rate": 0.00022821376986037554, "loss": 0.064, "theoretical_loss": 3.391228880185557, "tokens_seen": 2554331136 }, { "epoch": 0.77, "learning_rate": 0.00022813352591879313, "loss": 0.0625, "theoretical_loss": 3.3912014720609625, "tokens_seen": 2554593280 }, { "epoch": 0.77, "learning_rate": 0.00022805328197721074, "loss": 0.0657, "theoretical_loss": 3.391174067536173, "tokens_seen": 2554855424 }, { "epoch": 0.77, "learning_rate": 0.00022797303803562832, "loss": 0.0665, "theoretical_loss": 3.391146666610346, "tokens_seen": 2555117568 }, { "epoch": 0.77, "learning_rate": 0.00022789279409404588, "loss": 0.0665, "theoretical_loss": 3.3911192692826395, "tokens_seen": 2555379712 }, { "epoch": 0.77, "learning_rate": 0.0002278125501524635, "loss": 0.0647, "theoretical_loss": 3.3910918755522124, "tokens_seen": 2555641856 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.0013836933067068458, "objective/train/docs_used": 929757, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2935469150543213, "objective/train/original_loss": 1.2935466766357422, "objective/train/theoretical_loss": 3.391064485418223, "objective/train/tokens_used": 2576364000, "objective/train/value_avg": -0.0069427490234375, "objective/train/value_loss": 0.00013478376786224544, "objective/train/value_max": -2.8431415557861328e-05, "objective/train/value_min": -0.6962890625, "objective/train/value_reward_corr": 0.769539782212816, "objective/train/value_std": 0.01454925537109375, "objective/train/weight_avg": 1.0014495849609375, "objective/train/weighted_lm_loss": 1.295472502708435, "objective/train/weights_max": 1.4873151779174805, "objective/train/weights_min": 0.6114416718482971, "theoretical_loss": 3.391064485418223, "tokens_seen": 2555904000 }, { "epoch": 0.77, "learning_rate": 0.00022773230621088108, "loss": 0.0645, "theoretical_loss": 3.391064485418223, "tokens_seen": 2555904000 }, { "epoch": 0.77, "learning_rate": 0.00022765206226929867, "loss": 0.0667, "theoretical_loss": 3.3910370988798304, "tokens_seen": 2556166144 }, { "epoch": 0.77, "learning_rate": 0.00022757181832771625, "loss": 0.0667, "theoretical_loss": 3.3910097159361943, "tokens_seen": 2556428288 }, { "epoch": 0.77, "learning_rate": 0.00022749157438613386, "loss": 0.0634, "theoretical_loss": 3.3909823365864744, "tokens_seen": 2556690432 }, { "epoch": 0.77, "learning_rate": 0.00022741133044455145, "loss": 0.0646, "theoretical_loss": 3.3909549608298297, "tokens_seen": 2556952576 }, { "epoch": 0.77, "learning_rate": 0.000227331086502969, "loss": 0.0647, "theoretical_loss": 3.3909275886654213, "tokens_seen": 2557214720 }, { "epoch": 0.78, "learning_rate": 0.00022725084256138662, "loss": 0.0653, "theoretical_loss": 3.3909002200924094, "tokens_seen": 2557476864 }, { "epoch": 0.78, "learning_rate": 0.0002271705986198042, "loss": 0.0655, "theoretical_loss": 3.3908728551099543, "tokens_seen": 2557739008 }, { "epoch": 0.78, "learning_rate": 0.00022709035467822182, "loss": 0.0656, "theoretical_loss": 3.390845493717218, "tokens_seen": 2558001152 }, { "epoch": 0.78, "learning_rate": 0.00022701011073663938, "loss": 0.0671, "theoretical_loss": 3.3908181359133605, "tokens_seen": 2558263296 }, { "epoch": 0.78, "learning_rate": 0.000226929866795057, "loss": 0.0642, "theoretical_loss": 3.390790781697544, "tokens_seen": 2558525440 }, { "epoch": 0.78, "learning_rate": 0.00022684962285347458, "loss": 0.0666, "theoretical_loss": 3.390763431068931, "tokens_seen": 2558787584 }, { "epoch": 0.78, "learning_rate": 0.00022676937891189213, "loss": 0.0641, "theoretical_loss": 3.390736084026683, "tokens_seen": 2559049728 }, { "epoch": 0.78, "learning_rate": 0.00022668913497030975, "loss": 0.0609, "theoretical_loss": 3.390708740569962, "tokens_seen": 2559311872 }, { "epoch": 0.78, "learning_rate": 0.00022660889102872733, "loss": 0.0664, "theoretical_loss": 3.3906814006979316, "tokens_seen": 2559574016 }, { "epoch": 0.78, "learning_rate": 0.00022652864708714495, "loss": 0.0641, "theoretical_loss": 3.390654064409754, "tokens_seen": 2559836160 }, { "epoch": 0.78, "learning_rate": 0.0002264484031455625, "loss": 0.0601, "theoretical_loss": 3.3906267317045935, "tokens_seen": 2560098304 }, { "epoch": 0.78, "learning_rate": 0.0002263681592039801, "loss": 0.0639, "theoretical_loss": 3.3905994025816124, "tokens_seen": 2560360448 }, { "epoch": 0.78, "learning_rate": 0.0002262879152623977, "loss": 0.0644, "theoretical_loss": 3.3905720770399754, "tokens_seen": 2560622592 }, { "epoch": 0.78, "learning_rate": 0.0002262076713208153, "loss": 0.0616, "theoretical_loss": 3.3905447550788463, "tokens_seen": 2560884736 }, { "epoch": 0.78, "learning_rate": 0.00022612742737923287, "loss": 0.0637, "theoretical_loss": 3.39051743669739, "tokens_seen": 2561146880 }, { "epoch": 0.78, "learning_rate": 0.00022604718343765046, "loss": 0.0638, "theoretical_loss": 3.39049012189477, "tokens_seen": 2561409024 }, { "epoch": 0.78, "learning_rate": 0.00022596693949606807, "loss": 0.0627, "theoretical_loss": 3.3904628106701526, "tokens_seen": 2561671168 }, { "epoch": 0.78, "learning_rate": 0.00022588669555448563, "loss": 0.066, "theoretical_loss": 3.3904355030227022, "tokens_seen": 2561933312 }, { "epoch": 0.78, "learning_rate": 0.00022580645161290321, "loss": 0.0644, "theoretical_loss": 3.3904081989515844, "tokens_seen": 2562195456 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.00031394496909342706, "objective/train/docs_used": 931999, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3014920949935913, "objective/train/original_loss": 1.3014919757843018, "objective/train/theoretical_loss": 3.390380898455965, "objective/train/tokens_used": 2582917600, "objective/train/value_avg": -0.005458831787109375, "objective/train/value_loss": 0.00011215580889256671, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.265380859375, "objective/train/value_reward_corr": 0.7369466557109846, "objective/train/value_std": 0.01082611083984375, "objective/train/weight_avg": 1.0003670454025269, "objective/train/weighted_lm_loss": 1.30169677734375, "objective/train/weights_max": 1.1378111839294434, "objective/train/weights_min": 0.3843361437320709, "theoretical_loss": 3.390380898455965, "tokens_seen": 2562457600 }, { "epoch": 0.78, "learning_rate": 0.00022572620767132083, "loss": 0.0631, "theoretical_loss": 3.390380898455965, "tokens_seen": 2562457600 }, { "epoch": 0.78, "learning_rate": 0.0002256459637297384, "loss": 0.0665, "theoretical_loss": 3.390353601535011, "tokens_seen": 2562719744 }, { "epoch": 0.78, "learning_rate": 0.000225565719788156, "loss": 0.0643, "theoretical_loss": 3.3903263081878876, "tokens_seen": 2562981888 }, { "epoch": 0.78, "learning_rate": 0.00022548547584657358, "loss": 0.0635, "theoretical_loss": 3.390299018413762, "tokens_seen": 2563244032 }, { "epoch": 0.78, "learning_rate": 0.00022540523190499117, "loss": 0.0636, "theoretical_loss": 3.3902717322118, "tokens_seen": 2563506176 }, { "epoch": 0.78, "learning_rate": 0.00022532498796340878, "loss": 0.0659, "theoretical_loss": 3.3902444495811705, "tokens_seen": 2563768320 }, { "epoch": 0.78, "learning_rate": 0.00022524474402182634, "loss": 0.0649, "theoretical_loss": 3.3902171705210398, "tokens_seen": 2564030464 }, { "epoch": 0.78, "learning_rate": 0.00022516450008024395, "loss": 0.0626, "theoretical_loss": 3.390189895030576, "tokens_seen": 2564292608 }, { "epoch": 0.78, "learning_rate": 0.00022508425613866154, "loss": 0.0635, "theoretical_loss": 3.390162623108948, "tokens_seen": 2564554752 }, { "epoch": 0.78, "learning_rate": 0.00022500401219707915, "loss": 0.0651, "theoretical_loss": 3.390135354755323, "tokens_seen": 2564816896 }, { "epoch": 0.78, "learning_rate": 0.0002249237682554967, "loss": 0.0644, "theoretical_loss": 3.3901080899688694, "tokens_seen": 2565079040 }, { "epoch": 0.78, "learning_rate": 0.0002248435243139143, "loss": 0.0639, "theoretical_loss": 3.390080828748757, "tokens_seen": 2565341184 }, { "epoch": 0.78, "learning_rate": 0.0002247632803723319, "loss": 0.0642, "theoretical_loss": 3.390053571094154, "tokens_seen": 2565603328 }, { "epoch": 0.78, "learning_rate": 0.00022468303643074947, "loss": 0.0654, "theoretical_loss": 3.3900263170042306, "tokens_seen": 2565865472 }, { "epoch": 0.78, "learning_rate": 0.00022460279248916708, "loss": 0.0634, "theoretical_loss": 3.3899990664781563, "tokens_seen": 2566127616 }, { "epoch": 0.78, "learning_rate": 0.00022452254854758466, "loss": 0.066, "theoretical_loss": 3.389971819515101, "tokens_seen": 2566389760 }, { "epoch": 0.78, "learning_rate": 0.00022444230460600225, "loss": 0.0662, "theoretical_loss": 3.389944576114235, "tokens_seen": 2566651904 }, { "epoch": 0.78, "learning_rate": 0.00022436206066441984, "loss": 0.0624, "theoretical_loss": 3.3899173362747286, "tokens_seen": 2566914048 }, { "epoch": 0.78, "learning_rate": 0.00022428181672283742, "loss": 0.065, "theoretical_loss": 3.389890099995753, "tokens_seen": 2567176192 }, { "epoch": 0.78, "learning_rate": 0.00022420157278125503, "loss": 0.0661, "theoretical_loss": 3.389862867276479, "tokens_seen": 2567438336 }, { "epoch": 0.78, "learning_rate": 0.00022412132883967262, "loss": 0.0628, "theoretical_loss": 3.3898356381160784, "tokens_seen": 2567700480 }, { "epoch": 0.78, "learning_rate": 0.0002240410848980902, "loss": 0.0661, "theoretical_loss": 3.3898084125137222, "tokens_seen": 2567962624 }, { "epoch": 0.78, "learning_rate": 0.0002239608409565078, "loss": 0.0644, "theoretical_loss": 3.389781190468583, "tokens_seen": 2568224768 }, { "epoch": 0.78, "learning_rate": 0.00022388059701492538, "loss": 0.0651, "theoretical_loss": 3.3897539719798324, "tokens_seen": 2568486912 }, { "epoch": 0.78, "learning_rate": 0.00022380035307334296, "loss": 0.0657, "theoretical_loss": 3.389726757046643, "tokens_seen": 2568749056 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.0011137585388496518, "objective/train/docs_used": 934338, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2182008028030396, "objective/train/original_loss": 1.2182008028030396, "objective/train/theoretical_loss": 3.3896995456681878, "objective/train/tokens_used": 2589471200, "objective/train/value_avg": -0.0082244873046875, "objective/train/value_loss": 0.00020294927526265383, "objective/train/value_max": -1.9550323486328125e-05, "objective/train/value_min": -0.254150390625, "objective/train/value_reward_corr": 0.7417535635411541, "objective/train/value_std": 0.015960693359375, "objective/train/weight_avg": 1.0012061595916748, "objective/train/weighted_lm_loss": 1.2197353839874268, "objective/train/weights_max": 1.1746975183486938, "objective/train/weights_min": 0.3702559173107147, "theoretical_loss": 3.3896995456681878, "tokens_seen": 2569011200 }, { "epoch": 0.78, "learning_rate": 0.00022372010913176055, "loss": 0.0633, "theoretical_loss": 3.3896995456681878, "tokens_seen": 2569011200 }, { "epoch": 0.78, "learning_rate": 0.00022363986519017816, "loss": 0.0691, "theoretical_loss": 3.38967233784364, "tokens_seen": 2569273344 }, { "epoch": 0.78, "learning_rate": 0.00022355962124859574, "loss": 0.0635, "theoretical_loss": 3.3896451335721727, "tokens_seen": 2569535488 }, { "epoch": 0.78, "learning_rate": 0.0002234793773070133, "loss": 0.0645, "theoretical_loss": 3.3896179328529588, "tokens_seen": 2569797632 }, { "epoch": 0.78, "learning_rate": 0.00022339913336543092, "loss": 0.0665, "theoretical_loss": 3.3895907356851733, "tokens_seen": 2570059776 }, { "epoch": 0.78, "learning_rate": 0.0002233188894238485, "loss": 0.065, "theoretical_loss": 3.38956354206799, "tokens_seen": 2570321920 }, { "epoch": 0.78, "learning_rate": 0.00022323864548226611, "loss": 0.0667, "theoretical_loss": 3.389536352000583, "tokens_seen": 2570584064 }, { "epoch": 0.78, "learning_rate": 0.00022315840154068367, "loss": 0.0686, "theoretical_loss": 3.389509165482127, "tokens_seen": 2570846208 }, { "epoch": 0.78, "learning_rate": 0.00022307815759910128, "loss": 0.0664, "theoretical_loss": 3.389481982511797, "tokens_seen": 2571108352 }, { "epoch": 0.78, "learning_rate": 0.00022299791365751887, "loss": 0.067, "theoretical_loss": 3.3894548030887686, "tokens_seen": 2571370496 }, { "epoch": 0.78, "learning_rate": 0.00022291766971593643, "loss": 0.0645, "theoretical_loss": 3.3894276272122172, "tokens_seen": 2571632640 }, { "epoch": 0.78, "learning_rate": 0.00022283742577435404, "loss": 0.0641, "theoretical_loss": 3.389400454881318, "tokens_seen": 2571894784 }, { "epoch": 0.78, "learning_rate": 0.00022275718183277163, "loss": 0.067, "theoretical_loss": 3.389373286095248, "tokens_seen": 2572156928 }, { "epoch": 0.78, "learning_rate": 0.00022267693789118924, "loss": 0.0708, "theoretical_loss": 3.389346120853183, "tokens_seen": 2572419072 }, { "epoch": 0.78, "learning_rate": 0.0002225966939496068, "loss": 0.0681, "theoretical_loss": 3.3893189591543, "tokens_seen": 2572681216 }, { "epoch": 0.78, "learning_rate": 0.00022251645000802438, "loss": 0.0686, "theoretical_loss": 3.3892918009977753, "tokens_seen": 2572943360 }, { "epoch": 0.78, "learning_rate": 0.000222436206066442, "loss": 0.0642, "theoretical_loss": 3.3892646463827862, "tokens_seen": 2573205504 }, { "epoch": 0.78, "learning_rate": 0.00022235596212485958, "loss": 0.0636, "theoretical_loss": 3.3892374953085107, "tokens_seen": 2573467648 }, { "epoch": 0.78, "learning_rate": 0.00022227571818327717, "loss": 0.0703, "theoretical_loss": 3.389210347774126, "tokens_seen": 2573729792 }, { "epoch": 0.78, "learning_rate": 0.00022219547424169475, "loss": 0.0646, "theoretical_loss": 3.3891832037788103, "tokens_seen": 2573991936 }, { "epoch": 0.78, "learning_rate": 0.00022211523030011236, "loss": 0.0631, "theoretical_loss": 3.389156063321742, "tokens_seen": 2574254080 }, { "epoch": 0.78, "learning_rate": 0.00022203498635852992, "loss": 0.065, "theoretical_loss": 3.3891289264020994, "tokens_seen": 2574516224 }, { "epoch": 0.78, "learning_rate": 0.0002219547424169475, "loss": 0.0661, "theoretical_loss": 3.3891017930190612, "tokens_seen": 2574778368 }, { "epoch": 0.78, "learning_rate": 0.00022187449847536512, "loss": 0.0673, "theoretical_loss": 3.3890746631718067, "tokens_seen": 2575040512 }, { "epoch": 0.78, "learning_rate": 0.0002217942545337827, "loss": 0.0612, "theoretical_loss": 3.3890475368595157, "tokens_seen": 2575302656 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.0015231725992634892, "objective/train/docs_used": 936685, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2750457525253296, "objective/train/original_loss": 1.2750457525253296, "objective/train/theoretical_loss": 3.389020414081367, "objective/train/tokens_used": 2596024800, "objective/train/value_avg": -0.008087158203125, "objective/train/value_loss": 0.00017865085101220757, "objective/train/value_max": -2.86102294921875e-05, "objective/train/value_min": -0.2646484375, "objective/train/value_reward_corr": 0.6448530001084585, "objective/train/value_std": 0.012664794921875, "objective/train/weight_avg": 1.0016039609909058, "objective/train/weighted_lm_loss": 1.2768608331680298, "objective/train/weights_max": 1.1373058557510376, "objective/train/weights_min": 0.368586003780365, "theoretical_loss": 3.389020414081367, "tokens_seen": 2575564800 }, { "epoch": 0.78, "learning_rate": 0.0002217140105922003, "loss": 0.0666, "theoretical_loss": 3.389020414081367, "tokens_seen": 2575564800 }, { "epoch": 0.78, "learning_rate": 0.00022163376665061788, "loss": 0.0661, "theoretical_loss": 3.3889932948365407, "tokens_seen": 2575826944 }, { "epoch": 0.78, "learning_rate": 0.00022155352270903546, "loss": 0.0631, "theoretical_loss": 3.3889661791242176, "tokens_seen": 2576089088 }, { "epoch": 0.78, "learning_rate": 0.00022147327876745308, "loss": 0.0678, "theoretical_loss": 3.3889390669435775, "tokens_seen": 2576351232 }, { "epoch": 0.78, "learning_rate": 0.00022139303482587063, "loss": 0.0682, "theoretical_loss": 3.388911958293802, "tokens_seen": 2576613376 }, { "epoch": 0.78, "learning_rate": 0.00022131279088428825, "loss": 0.0616, "theoretical_loss": 3.388884853174071, "tokens_seen": 2576875520 }, { "epoch": 0.78, "learning_rate": 0.00022123254694270583, "loss": 0.0687, "theoretical_loss": 3.3888577515835663, "tokens_seen": 2577137664 }, { "epoch": 0.78, "learning_rate": 0.00022115230300112342, "loss": 0.0615, "theoretical_loss": 3.3888306535214694, "tokens_seen": 2577399808 }, { "epoch": 0.78, "learning_rate": 0.000221072059059541, "loss": 0.0656, "theoretical_loss": 3.3888035589869627, "tokens_seen": 2577661952 }, { "epoch": 0.78, "learning_rate": 0.0002209918151179586, "loss": 0.0651, "theoretical_loss": 3.3887764679792274, "tokens_seen": 2577924096 }, { "epoch": 0.78, "learning_rate": 0.0002209115711763762, "loss": 0.0655, "theoretical_loss": 3.3887493804974462, "tokens_seen": 2578186240 }, { "epoch": 0.78, "learning_rate": 0.00022083132723479376, "loss": 0.0692, "theoretical_loss": 3.388722296540802, "tokens_seen": 2578448384 }, { "epoch": 0.78, "learning_rate": 0.00022075108329321137, "loss": 0.0674, "theoretical_loss": 3.3886952161084776, "tokens_seen": 2578710528 }, { "epoch": 0.78, "learning_rate": 0.00022067083935162896, "loss": 0.0651, "theoretical_loss": 3.3886681391996563, "tokens_seen": 2578972672 }, { "epoch": 0.78, "learning_rate": 0.00022059059541004654, "loss": 0.0676, "theoretical_loss": 3.3886410658135206, "tokens_seen": 2579234816 }, { "epoch": 0.78, "learning_rate": 0.00022051035146846413, "loss": 0.0681, "theoretical_loss": 3.388613995949256, "tokens_seen": 2579496960 }, { "epoch": 0.78, "learning_rate": 0.00022043010752688171, "loss": 0.0675, "theoretical_loss": 3.388586929606045, "tokens_seen": 2579759104 }, { "epoch": 0.78, "learning_rate": 0.00022034986358529933, "loss": 0.0643, "theoretical_loss": 3.3885598667830727, "tokens_seen": 2580021248 }, { "epoch": 0.78, "learning_rate": 0.0002202696196437169, "loss": 0.0619, "theoretical_loss": 3.3885328074795233, "tokens_seen": 2580283392 }, { "epoch": 0.78, "learning_rate": 0.0002201893757021345, "loss": 0.0648, "theoretical_loss": 3.3885057516945816, "tokens_seen": 2580545536 }, { "epoch": 0.78, "learning_rate": 0.00022010913176055208, "loss": 0.0674, "theoretical_loss": 3.388478699427433, "tokens_seen": 2580807680 }, { "epoch": 0.78, "learning_rate": 0.00022002888781896967, "loss": 0.0639, "theoretical_loss": 3.388451650677262, "tokens_seen": 2581069824 }, { "epoch": 0.78, "learning_rate": 0.00021994864387738725, "loss": 0.067, "theoretical_loss": 3.388424605443256, "tokens_seen": 2581331968 }, { "epoch": 0.78, "learning_rate": 0.00021986839993580484, "loss": 0.0655, "theoretical_loss": 3.3883975637245993, "tokens_seen": 2581594112 }, { "epoch": 0.78, "learning_rate": 0.00021978815599422245, "loss": 0.0639, "theoretical_loss": 3.388370525520479, "tokens_seen": 2581856256 }, { "epoch": 0.78, "objective/train/advantage_avg": -1.6538759155082516e-05, "objective/train/docs_used": 938961, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3801133632659912, "objective/train/original_loss": 1.380113124847412, "objective/train/theoretical_loss": 3.3883434908300805, "objective/train/tokens_used": 2602578400, "objective/train/value_avg": -0.00927734375, "objective/train/value_loss": 0.0004200639668852091, "objective/train/value_max": -3.4809112548828125e-05, "objective/train/value_min": -0.662109375, "objective/train/value_reward_corr": 0.7653193134682554, "objective/train/value_std": 0.021453857421875, "objective/train/weight_avg": 1.0001425743103027, "objective/train/weighted_lm_loss": 1.3799386024475098, "objective/train/weights_max": 1.3826122283935547, "objective/train/weights_min": 0.055677253752946854, "theoretical_loss": 3.3883434908300805, "tokens_seen": 2582118400 }, { "epoch": 0.78, "learning_rate": 0.00021970791205264004, "loss": 0.0653, "theoretical_loss": 3.3883434908300805, "tokens_seen": 2582118400 }, { "epoch": 0.78, "learning_rate": 0.0002196276681110576, "loss": 0.0676, "theoretical_loss": 3.388316459652591, "tokens_seen": 2582380544 }, { "epoch": 0.78, "learning_rate": 0.0002195474241694752, "loss": 0.0639, "theoretical_loss": 3.3882894319871983, "tokens_seen": 2582642688 }, { "epoch": 0.78, "learning_rate": 0.0002194671802278928, "loss": 0.0654, "theoretical_loss": 3.388262407833089, "tokens_seen": 2582904832 }, { "epoch": 0.78, "learning_rate": 0.0002193869362863104, "loss": 0.0671, "theoretical_loss": 3.3882353871894506, "tokens_seen": 2583166976 }, { "epoch": 0.78, "learning_rate": 0.00021930669234472797, "loss": 0.0657, "theoretical_loss": 3.388208370055471, "tokens_seen": 2583429120 }, { "epoch": 0.78, "learning_rate": 0.00021922644840314558, "loss": 0.065, "theoretical_loss": 3.388181356430338, "tokens_seen": 2583691264 }, { "epoch": 0.78, "learning_rate": 0.00021914620446156316, "loss": 0.0639, "theoretical_loss": 3.3881543463132404, "tokens_seen": 2583953408 }, { "epoch": 0.78, "learning_rate": 0.00021906596051998072, "loss": 0.0645, "theoretical_loss": 3.3881273397033667, "tokens_seen": 2584215552 }, { "epoch": 0.78, "learning_rate": 0.00021898571657839834, "loss": 0.0657, "theoretical_loss": 3.388100336599906, "tokens_seen": 2584477696 }, { "epoch": 0.78, "learning_rate": 0.00021890547263681592, "loss": 0.0649, "theoretical_loss": 3.388073337002047, "tokens_seen": 2584739840 }, { "epoch": 0.78, "learning_rate": 0.00021882522869523353, "loss": 0.0649, "theoretical_loss": 3.388046340908979, "tokens_seen": 2585001984 }, { "epoch": 0.78, "learning_rate": 0.0002187449847536511, "loss": 0.0653, "theoretical_loss": 3.388019348319892, "tokens_seen": 2585264128 }, { "epoch": 0.78, "learning_rate": 0.0002186647408120687, "loss": 0.0625, "theoretical_loss": 3.3879923592339765, "tokens_seen": 2585526272 }, { "epoch": 0.78, "learning_rate": 0.0002185844968704863, "loss": 0.064, "theoretical_loss": 3.3879653736504216, "tokens_seen": 2585788416 }, { "epoch": 0.78, "learning_rate": 0.00021850425292890388, "loss": 0.0625, "theoretical_loss": 3.3879383915684187, "tokens_seen": 2586050560 }, { "epoch": 0.78, "learning_rate": 0.00021842400898732146, "loss": 0.0695, "theoretical_loss": 3.387911412987158, "tokens_seen": 2586312704 }, { "epoch": 0.78, "learning_rate": 0.00021834376504573905, "loss": 0.0652, "theoretical_loss": 3.3878844379058313, "tokens_seen": 2586574848 }, { "epoch": 0.78, "learning_rate": 0.00021826352110415666, "loss": 0.0678, "theoretical_loss": 3.387857466323629, "tokens_seen": 2586836992 }, { "epoch": 0.78, "learning_rate": 0.00021818327716257422, "loss": 0.0676, "theoretical_loss": 3.3878304982397434, "tokens_seen": 2587099136 }, { "epoch": 0.78, "learning_rate": 0.0002181030332209918, "loss": 0.0647, "theoretical_loss": 3.387803533653366, "tokens_seen": 2587361280 }, { "epoch": 0.78, "learning_rate": 0.00021802278927940942, "loss": 0.0684, "theoretical_loss": 3.3877765725636886, "tokens_seen": 2587623424 }, { "epoch": 0.78, "learning_rate": 0.000217942545337827, "loss": 0.0658, "theoretical_loss": 3.3877496149699047, "tokens_seen": 2587885568 }, { "epoch": 0.78, "learning_rate": 0.0002178623013962446, "loss": 0.0639, "theoretical_loss": 3.387722660871206, "tokens_seen": 2588147712 }, { "epoch": 0.78, "learning_rate": 0.00021778205745466217, "loss": 0.0661, "theoretical_loss": 3.387695710266785, "tokens_seen": 2588409856 }, { "epoch": 0.78, "objective/train/advantage_avg": -0.00018109590746462345, "objective/train/docs_used": 941211, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2806812524795532, "objective/train/original_loss": 1.2806812524795532, "objective/train/theoretical_loss": 3.387668763155836, "objective/train/tokens_used": 2609132000, "objective/train/value_avg": -0.007171630859375, "objective/train/value_loss": 0.0002567979972809553, "objective/train/value_max": -2.7120113372802734e-05, "objective/train/value_min": -0.7080078125, "objective/train/value_reward_corr": 0.7377811245142272, "objective/train/value_std": 0.0166473388671875, "objective/train/weight_avg": 0.9999337792396545, "objective/train/weighted_lm_loss": 1.2800700664520264, "objective/train/weights_max": 1.4415401220321655, "objective/train/weights_min": 0.37006106972694397, "theoretical_loss": 3.387668763155836, "tokens_seen": 2588672000 }, { "epoch": 0.78, "learning_rate": 0.00021770181351307978, "loss": 0.0682, "theoretical_loss": 3.387668763155836, "tokens_seen": 2588672000 }, { "epoch": 0.78, "learning_rate": 0.00021762156957149737, "loss": 0.066, "theoretical_loss": 3.387641819537552, "tokens_seen": 2588934144 }, { "epoch": 0.78, "learning_rate": 0.00021754132562991493, "loss": 0.0682, "theoretical_loss": 3.3876148794111267, "tokens_seen": 2589196288 }, { "epoch": 0.78, "learning_rate": 0.00021746108168833254, "loss": 0.064, "theoretical_loss": 3.3875879427757543, "tokens_seen": 2589458432 }, { "epoch": 0.78, "learning_rate": 0.00021738083774675013, "loss": 0.0654, "theoretical_loss": 3.3875610096306286, "tokens_seen": 2589720576 }, { "epoch": 0.78, "learning_rate": 0.0002173005938051677, "loss": 0.0659, "theoretical_loss": 3.3875340799749445, "tokens_seen": 2589982720 }, { "epoch": 0.78, "learning_rate": 0.0002172203498635853, "loss": 0.0612, "theoretical_loss": 3.3875071538078965, "tokens_seen": 2590244864 }, { "epoch": 0.79, "learning_rate": 0.00021714010592200288, "loss": 0.0623, "theoretical_loss": 3.38748023112868, "tokens_seen": 2590507008 }, { "epoch": 0.79, "learning_rate": 0.0002170598619804205, "loss": 0.0641, "theoretical_loss": 3.3874533119364902, "tokens_seen": 2590769152 }, { "epoch": 0.79, "learning_rate": 0.00021697961803883805, "loss": 0.0668, "theoretical_loss": 3.3874263962305227, "tokens_seen": 2591031296 }, { "epoch": 0.79, "learning_rate": 0.00021689937409725567, "loss": 0.0633, "theoretical_loss": 3.3873994840099733, "tokens_seen": 2591293440 }, { "epoch": 0.79, "learning_rate": 0.00021681913015567325, "loss": 0.0662, "theoretical_loss": 3.3873725752740382, "tokens_seen": 2591555584 }, { "epoch": 0.79, "learning_rate": 0.00021673888621409087, "loss": 0.0638, "theoretical_loss": 3.387345670021914, "tokens_seen": 2591817728 }, { "epoch": 0.79, "learning_rate": 0.00021665864227250842, "loss": 0.0658, "theoretical_loss": 3.387318768252797, "tokens_seen": 2592079872 }, { "epoch": 0.79, "learning_rate": 0.000216578398330926, "loss": 0.0653, "theoretical_loss": 3.387291869965884, "tokens_seen": 2592342016 }, { "epoch": 0.79, "learning_rate": 0.00021649815438934362, "loss": 0.0676, "theoretical_loss": 3.3872649751603725, "tokens_seen": 2592604160 }, { "epoch": 0.79, "learning_rate": 0.00021641791044776118, "loss": 0.0662, "theoretical_loss": 3.3872380838354603, "tokens_seen": 2592866304 }, { "epoch": 0.79, "learning_rate": 0.0002163376665061788, "loss": 0.0618, "theoretical_loss": 3.3872111959903446, "tokens_seen": 2593128448 }, { "epoch": 0.79, "learning_rate": 0.00021625742256459638, "loss": 0.0632, "theoretical_loss": 3.3871843116242237, "tokens_seen": 2593390592 }, { "epoch": 0.79, "learning_rate": 0.00021617717862301396, "loss": 0.0652, "theoretical_loss": 3.3871574307362957, "tokens_seen": 2593652736 }, { "epoch": 0.79, "learning_rate": 0.00021609693468143155, "loss": 0.0653, "theoretical_loss": 3.387130553325759, "tokens_seen": 2593914880 }, { "epoch": 0.79, "learning_rate": 0.00021601669073984913, "loss": 0.0658, "theoretical_loss": 3.3871036793918123, "tokens_seen": 2594177024 }, { "epoch": 0.79, "learning_rate": 0.00021593644679826675, "loss": 0.0661, "theoretical_loss": 3.3870768089336556, "tokens_seen": 2594439168 }, { "epoch": 0.79, "learning_rate": 0.00021585620285668433, "loss": 0.0664, "theoretical_loss": 3.3870499419504867, "tokens_seen": 2594701312 }, { "epoch": 0.79, "learning_rate": 0.00021577595891510192, "loss": 0.066, "theoretical_loss": 3.3870230784415067, "tokens_seen": 2594963456 }, { "epoch": 0.79, "objective/train/advantage_avg": -0.00033047914621420205, "objective/train/docs_used": 943650, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4407196044921875, "objective/train/original_loss": 1.440719485282898, "objective/train/theoretical_loss": 3.3869962184059146, "objective/train/tokens_used": 2615685600, "objective/train/value_avg": -0.007740020751953125, "objective/train/value_loss": 0.000411410495871678, "objective/train/value_max": -3.534555435180664e-05, "objective/train/value_min": -0.86767578125, "objective/train/value_reward_corr": 0.7215062047244037, "objective/train/value_std": 0.0186920166015625, "objective/train/weight_avg": 0.9998422265052795, "objective/train/weighted_lm_loss": 1.4408422708511353, "objective/train/weights_max": 1.393456220626831, "objective/train/weights_min": 0.23286989331245422, "theoretical_loss": 3.3869962184059146, "tokens_seen": 2595225600 }, { "epoch": 0.79, "learning_rate": 0.0002156957149735195, "loss": 0.0685, "theoretical_loss": 3.3869962184059146, "tokens_seen": 2595225600 }, { "epoch": 0.79, "learning_rate": 0.0002156154710319371, "loss": 0.0694, "theoretical_loss": 3.386969361842911, "tokens_seen": 2595487744 }, { "epoch": 0.79, "learning_rate": 0.0002155352270903547, "loss": 0.0671, "theoretical_loss": 3.3869425087516953, "tokens_seen": 2595749888 }, { "epoch": 0.79, "learning_rate": 0.00021545498314877226, "loss": 0.0674, "theoretical_loss": 3.386915659131469, "tokens_seen": 2596012032 }, { "epoch": 0.79, "learning_rate": 0.00021537473920718987, "loss": 0.0681, "theoretical_loss": 3.386888812981433, "tokens_seen": 2596274176 }, { "epoch": 0.79, "learning_rate": 0.00021529449526560746, "loss": 0.0642, "theoretical_loss": 3.3868619703007883, "tokens_seen": 2596536320 }, { "epoch": 0.79, "learning_rate": 0.00021521425132402502, "loss": 0.0641, "theoretical_loss": 3.386835131088737, "tokens_seen": 2596798464 }, { "epoch": 0.79, "learning_rate": 0.00021513400738244263, "loss": 0.065, "theoretical_loss": 3.386808295344479, "tokens_seen": 2597060608 }, { "epoch": 0.79, "learning_rate": 0.00021505376344086021, "loss": 0.0656, "theoretical_loss": 3.386781463067218, "tokens_seen": 2597322752 }, { "epoch": 0.79, "learning_rate": 0.00021497351949927783, "loss": 0.0654, "theoretical_loss": 3.386754634256156, "tokens_seen": 2597584896 }, { "epoch": 0.79, "learning_rate": 0.00021489327555769539, "loss": 0.0644, "theoretical_loss": 3.3867278089104946, "tokens_seen": 2597847040 }, { "epoch": 0.79, "learning_rate": 0.000214813031616113, "loss": 0.066, "theoretical_loss": 3.3867009870294376, "tokens_seen": 2598109184 }, { "epoch": 0.79, "learning_rate": 0.00021473278767453058, "loss": 0.0685, "theoretical_loss": 3.3866741686121875, "tokens_seen": 2598371328 }, { "epoch": 0.79, "learning_rate": 0.00021465254373294817, "loss": 0.0645, "theoretical_loss": 3.3866473536579473, "tokens_seen": 2598633472 }, { "epoch": 0.79, "learning_rate": 0.00021457229979136576, "loss": 0.0684, "theoretical_loss": 3.3866205421659217, "tokens_seen": 2598895616 }, { "epoch": 0.79, "learning_rate": 0.00021449205584978334, "loss": 0.0675, "theoretical_loss": 3.386593734135313, "tokens_seen": 2599157760 }, { "epoch": 0.79, "learning_rate": 0.00021441181190820095, "loss": 0.0674, "theoretical_loss": 3.3865669295653262, "tokens_seen": 2599419904 }, { "epoch": 0.79, "learning_rate": 0.0002143315679666185, "loss": 0.0625, "theoretical_loss": 3.3865401284551657, "tokens_seen": 2599682048 }, { "epoch": 0.79, "learning_rate": 0.0002142513240250361, "loss": 0.0662, "theoretical_loss": 3.3865133308040356, "tokens_seen": 2599944192 }, { "epoch": 0.79, "learning_rate": 0.0002141710800834537, "loss": 0.0673, "theoretical_loss": 3.3864865366111414, "tokens_seen": 2600206336 }, { "epoch": 0.79, "learning_rate": 0.0002140908361418713, "loss": 0.0626, "theoretical_loss": 3.3864597458756878, "tokens_seen": 2600468480 }, { "epoch": 0.79, "learning_rate": 0.00021401059220028888, "loss": 0.0674, "theoretical_loss": 3.38643295859688, "tokens_seen": 2600730624 }, { "epoch": 0.79, "learning_rate": 0.00021393034825870647, "loss": 0.0695, "theoretical_loss": 3.3864061747739242, "tokens_seen": 2600992768 }, { "epoch": 0.79, "learning_rate": 0.00021385010431712408, "loss": 0.0695, "theoretical_loss": 3.386379394406026, "tokens_seen": 2601254912 }, { "epoch": 0.79, "learning_rate": 0.00021376986037554166, "loss": 0.0648, "theoretical_loss": 3.3863526174923915, "tokens_seen": 2601517056 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.0008119245758280158, "objective/train/docs_used": 946148, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.417919635772705, "objective/train/original_loss": 1.4179197549819946, "objective/train/theoretical_loss": 3.3863258440322275, "objective/train/tokens_used": 2622239200, "objective/train/value_avg": -0.0088043212890625, "objective/train/value_loss": 0.0001911604340421036, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.51513671875, "objective/train/value_reward_corr": 0.7656120068838768, "objective/train/value_std": 0.016815185546875, "objective/train/weight_avg": 1.0009019374847412, "objective/train/weighted_lm_loss": 1.4192315340042114, "objective/train/weights_max": 1.3128715753555298, "objective/train/weights_min": 0.3708581030368805, "theoretical_loss": 3.3863258440322275, "tokens_seen": 2601779200 }, { "epoch": 0.79, "learning_rate": 0.00021368961643395922, "loss": 0.0668, "theoretical_loss": 3.3863258440322275, "tokens_seen": 2601779200 }, { "epoch": 0.79, "learning_rate": 0.00021360937249237684, "loss": 0.0676, "theoretical_loss": 3.3862990740247403, "tokens_seen": 2602041344 }, { "epoch": 0.79, "learning_rate": 0.00021352912855079442, "loss": 0.0653, "theoretical_loss": 3.3862723074691377, "tokens_seen": 2602303488 }, { "epoch": 0.79, "learning_rate": 0.000213448884609212, "loss": 0.0705, "theoretical_loss": 3.3862455443646255, "tokens_seen": 2602565632 }, { "epoch": 0.79, "learning_rate": 0.0002133686406676296, "loss": 0.0683, "theoretical_loss": 3.3862187847104126, "tokens_seen": 2602827776 }, { "epoch": 0.79, "learning_rate": 0.00021328839672604718, "loss": 0.0651, "theoretical_loss": 3.3861920285057057, "tokens_seen": 2603089920 }, { "epoch": 0.79, "learning_rate": 0.0002132081527844648, "loss": 0.0661, "theoretical_loss": 3.386165275749714, "tokens_seen": 2603352064 }, { "epoch": 0.79, "learning_rate": 0.00021312790884288235, "loss": 0.0651, "theoretical_loss": 3.3861385264416444, "tokens_seen": 2603614208 }, { "epoch": 0.79, "learning_rate": 0.00021304766490129996, "loss": 0.066, "theoretical_loss": 3.3861117805807064, "tokens_seen": 2603876352 }, { "epoch": 0.79, "learning_rate": 0.00021296742095971755, "loss": 0.0662, "theoretical_loss": 3.3860850381661085, "tokens_seen": 2604138496 }, { "epoch": 0.79, "learning_rate": 0.00021288717701813516, "loss": 0.0675, "theoretical_loss": 3.3860582991970594, "tokens_seen": 2604400640 }, { "epoch": 0.79, "learning_rate": 0.00021280693307655272, "loss": 0.0667, "theoretical_loss": 3.3860315636727694, "tokens_seen": 2604662784 }, { "epoch": 0.79, "learning_rate": 0.0002127266891349703, "loss": 0.0668, "theoretical_loss": 3.386004831592447, "tokens_seen": 2604924928 }, { "epoch": 0.79, "learning_rate": 0.00021264644519338792, "loss": 0.0639, "theoretical_loss": 3.385978102955303, "tokens_seen": 2605187072 }, { "epoch": 0.79, "learning_rate": 0.00021256620125180547, "loss": 0.0647, "theoretical_loss": 3.3859513777605468, "tokens_seen": 2605449216 }, { "epoch": 0.79, "learning_rate": 0.0002124859573102231, "loss": 0.0651, "theoretical_loss": 3.385924656007389, "tokens_seen": 2605711360 }, { "epoch": 0.79, "learning_rate": 0.00021240571336864067, "loss": 0.0658, "theoretical_loss": 3.38589793769504, "tokens_seen": 2605973504 }, { "epoch": 0.79, "learning_rate": 0.00021232546942705826, "loss": 0.0668, "theoretical_loss": 3.3858712228227117, "tokens_seen": 2606235648 }, { "epoch": 0.79, "learning_rate": 0.00021224522548547584, "loss": 0.0632, "theoretical_loss": 3.385844511389614, "tokens_seen": 2606497792 }, { "epoch": 0.79, "learning_rate": 0.00021216498154389343, "loss": 0.0694, "theoretical_loss": 3.385817803394959, "tokens_seen": 2606759936 }, { "epoch": 0.79, "learning_rate": 0.00021208473760231104, "loss": 0.0651, "theoretical_loss": 3.3857910988379576, "tokens_seen": 2607022080 }, { "epoch": 0.79, "learning_rate": 0.00021200449366072863, "loss": 0.0655, "theoretical_loss": 3.385764397717823, "tokens_seen": 2607284224 }, { "epoch": 0.79, "learning_rate": 0.0002119242497191462, "loss": 0.066, "theoretical_loss": 3.3857377000337663, "tokens_seen": 2607546368 }, { "epoch": 0.79, "learning_rate": 0.0002118440057775638, "loss": 0.0633, "theoretical_loss": 3.3857110057850006, "tokens_seen": 2607808512 }, { "epoch": 0.79, "learning_rate": 0.00021176376183598138, "loss": 0.0662, "theoretical_loss": 3.3856843149707383, "tokens_seen": 2608070656 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.0013975916663184762, "objective/train/docs_used": 948381, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3176509141921997, "objective/train/original_loss": 1.3176510334014893, "objective/train/theoretical_loss": 3.3856576275901924, "objective/train/tokens_used": 2628792800, "objective/train/value_avg": -0.00994873046875, "objective/train/value_loss": 0.00038583975401706994, "objective/train/value_max": -2.0325183868408203e-05, "objective/train/value_min": -0.93505859375, "objective/train/value_reward_corr": 0.7545843172896143, "objective/train/value_std": 0.0260009765625, "objective/train/weight_avg": 1.001584529876709, "objective/train/weighted_lm_loss": 1.3192466497421265, "objective/train/weights_max": 2.2463696002960205, "objective/train/weights_min": 0.371359258890152, "theoretical_loss": 3.3856576275901924, "tokens_seen": 2608332800 }, { "epoch": 0.79, "learning_rate": 0.00021168351789439897, "loss": 0.0635, "theoretical_loss": 3.3856576275901924, "tokens_seen": 2608332800 }, { "epoch": 0.79, "learning_rate": 0.00021160327395281655, "loss": 0.0665, "theoretical_loss": 3.385630943642576, "tokens_seen": 2608594944 }, { "epoch": 0.79, "learning_rate": 0.00021152303001123417, "loss": 0.0648, "theoretical_loss": 3.3856042631271026, "tokens_seen": 2608857088 }, { "epoch": 0.79, "learning_rate": 0.00021144278606965175, "loss": 0.066, "theoretical_loss": 3.385577586042986, "tokens_seen": 2609119232 }, { "epoch": 0.79, "learning_rate": 0.0002113625421280693, "loss": 0.066, "theoretical_loss": 3.385550912389441, "tokens_seen": 2609381376 }, { "epoch": 0.79, "learning_rate": 0.00021128229818648692, "loss": 0.0671, "theoretical_loss": 3.385524242165681, "tokens_seen": 2609643520 }, { "epoch": 0.79, "learning_rate": 0.0002112020542449045, "loss": 0.0635, "theoretical_loss": 3.3854975753709207, "tokens_seen": 2609905664 }, { "epoch": 0.79, "learning_rate": 0.00021112181030332212, "loss": 0.0671, "theoretical_loss": 3.3854709120043744, "tokens_seen": 2610167808 }, { "epoch": 0.79, "learning_rate": 0.00021104156636173968, "loss": 0.0652, "theoretical_loss": 3.3854442520652577, "tokens_seen": 2610429952 }, { "epoch": 0.79, "learning_rate": 0.0002109613224201573, "loss": 0.0653, "theoretical_loss": 3.385417595552786, "tokens_seen": 2610692096 }, { "epoch": 0.79, "learning_rate": 0.00021088107847857488, "loss": 0.0673, "theoretical_loss": 3.3853909424661746, "tokens_seen": 2610954240 }, { "epoch": 0.79, "learning_rate": 0.00021080083453699246, "loss": 0.0677, "theoretical_loss": 3.3853642928046397, "tokens_seen": 2611216384 }, { "epoch": 0.79, "learning_rate": 0.00021072059059541005, "loss": 0.0668, "theoretical_loss": 3.3853376465673968, "tokens_seen": 2611478528 }, { "epoch": 0.79, "learning_rate": 0.00021064034665382763, "loss": 0.0699, "theoretical_loss": 3.3853110037536625, "tokens_seen": 2611740672 }, { "epoch": 0.79, "learning_rate": 0.00021056010271224525, "loss": 0.068, "theoretical_loss": 3.3852843643626533, "tokens_seen": 2612002816 }, { "epoch": 0.79, "learning_rate": 0.0002104798587706628, "loss": 0.0691, "theoretical_loss": 3.3852577283935865, "tokens_seen": 2612264960 }, { "epoch": 0.79, "learning_rate": 0.0002103996148290804, "loss": 0.068, "theoretical_loss": 3.385231095845678, "tokens_seen": 2612527104 }, { "epoch": 0.79, "learning_rate": 0.000210319370887498, "loss": 0.0625, "theoretical_loss": 3.385204466718147, "tokens_seen": 2612789248 }, { "epoch": 0.79, "learning_rate": 0.0002102391269459156, "loss": 0.0661, "theoretical_loss": 3.3851778410102096, "tokens_seen": 2613051392 }, { "epoch": 0.79, "learning_rate": 0.00021015888300433317, "loss": 0.0673, "theoretical_loss": 3.3851512187210844, "tokens_seen": 2613313536 }, { "epoch": 0.79, "learning_rate": 0.00021007863906275076, "loss": 0.066, "theoretical_loss": 3.3851245998499895, "tokens_seen": 2613575680 }, { "epoch": 0.79, "learning_rate": 0.00020999839512116837, "loss": 0.0669, "theoretical_loss": 3.385097984396143, "tokens_seen": 2613837824 }, { "epoch": 0.79, "learning_rate": 0.00020991815117958596, "loss": 0.0657, "theoretical_loss": 3.3850713723587633, "tokens_seen": 2614099968 }, { "epoch": 0.79, "learning_rate": 0.00020983790723800352, "loss": 0.0649, "theoretical_loss": 3.38504476373707, "tokens_seen": 2614362112 }, { "epoch": 0.79, "learning_rate": 0.00020975766329642113, "loss": 0.0652, "theoretical_loss": 3.3850181585302823, "tokens_seen": 2614624256 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.0011500355321913958, "objective/train/docs_used": 950739, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3640177249908447, "objective/train/original_loss": 1.3640177249908447, "objective/train/theoretical_loss": 3.384991556737619, "objective/train/tokens_used": 2635346400, "objective/train/value_avg": -0.007099151611328125, "objective/train/value_loss": 0.00024195798323489726, "objective/train/value_max": -2.753734588623047e-05, "objective/train/value_min": -0.94287109375, "objective/train/value_reward_corr": 0.7123578471830624, "objective/train/value_std": 0.0147705078125, "objective/train/weight_avg": 1.0012575387954712, "objective/train/weighted_lm_loss": 1.365799069404602, "objective/train/weights_max": 2.00335693359375, "objective/train/weights_min": 0.3683691918849945, "theoretical_loss": 3.384991556737619, "tokens_seen": 2614886400 }, { "epoch": 0.79, "learning_rate": 0.00020967741935483871, "loss": 0.0635, "theoretical_loss": 3.384991556737619, "tokens_seen": 2614886400 }, { "epoch": 0.79, "learning_rate": 0.0002095971754132563, "loss": 0.0666, "theoretical_loss": 3.3849649583583004, "tokens_seen": 2615148544 }, { "epoch": 0.79, "learning_rate": 0.00020951693147167389, "loss": 0.0657, "theoretical_loss": 3.3849383633915457, "tokens_seen": 2615410688 }, { "epoch": 0.79, "learning_rate": 0.0002094366875300915, "loss": 0.065, "theoretical_loss": 3.3849117718365758, "tokens_seen": 2615672832 }, { "epoch": 0.79, "learning_rate": 0.00020935644358850908, "loss": 0.067, "theoretical_loss": 3.38488518369261, "tokens_seen": 2615934976 }, { "epoch": 0.79, "learning_rate": 0.00020927619964692664, "loss": 0.0653, "theoretical_loss": 3.384858598958871, "tokens_seen": 2616197120 }, { "epoch": 0.79, "learning_rate": 0.00020919595570534426, "loss": 0.0656, "theoretical_loss": 3.384832017634578, "tokens_seen": 2616459264 }, { "epoch": 0.79, "learning_rate": 0.00020911571176376184, "loss": 0.065, "theoretical_loss": 3.384805439718953, "tokens_seen": 2616721408 }, { "epoch": 0.79, "learning_rate": 0.00020903546782217945, "loss": 0.0656, "theoretical_loss": 3.384778865211217, "tokens_seen": 2616983552 }, { "epoch": 0.79, "learning_rate": 0.000208955223880597, "loss": 0.0662, "theoretical_loss": 3.3847522941105925, "tokens_seen": 2617245696 }, { "epoch": 0.79, "learning_rate": 0.0002088749799390146, "loss": 0.0644, "theoretical_loss": 3.384725726416301, "tokens_seen": 2617507840 }, { "epoch": 0.79, "learning_rate": 0.0002087947359974322, "loss": 0.0647, "theoretical_loss": 3.384699162127564, "tokens_seen": 2617769984 }, { "epoch": 0.79, "learning_rate": 0.00020871449205584977, "loss": 0.0672, "theoretical_loss": 3.3846726012436057, "tokens_seen": 2618032128 }, { "epoch": 0.79, "learning_rate": 0.00020863424811426738, "loss": 0.0637, "theoretical_loss": 3.3846460437636474, "tokens_seen": 2618294272 }, { "epoch": 0.79, "learning_rate": 0.00020855400417268497, "loss": 0.0634, "theoretical_loss": 3.3846194896869126, "tokens_seen": 2618556416 }, { "epoch": 0.79, "learning_rate": 0.00020847376023110258, "loss": 0.0683, "theoretical_loss": 3.3845929390126246, "tokens_seen": 2618818560 }, { "epoch": 0.79, "learning_rate": 0.00020839351628952014, "loss": 0.0672, "theoretical_loss": 3.384566391740007, "tokens_seen": 2619080704 }, { "epoch": 0.79, "learning_rate": 0.00020831327234793772, "loss": 0.0674, "theoretical_loss": 3.3845398478682833, "tokens_seen": 2619342848 }, { "epoch": 0.79, "learning_rate": 0.00020823302840635534, "loss": 0.0637, "theoretical_loss": 3.384513307396678, "tokens_seen": 2619604992 }, { "epoch": 0.79, "learning_rate": 0.00020815278446477292, "loss": 0.0647, "theoretical_loss": 3.384486770324415, "tokens_seen": 2619867136 }, { "epoch": 0.79, "learning_rate": 0.0002080725405231905, "loss": 0.0658, "theoretical_loss": 3.3844602366507184, "tokens_seen": 2620129280 }, { "epoch": 0.79, "learning_rate": 0.0002079922965816081, "loss": 0.0667, "theoretical_loss": 3.3844337063748138, "tokens_seen": 2620391424 }, { "epoch": 0.79, "learning_rate": 0.00020791205264002568, "loss": 0.0682, "theoretical_loss": 3.384407179495926, "tokens_seen": 2620653568 }, { "epoch": 0.79, "learning_rate": 0.00020783180869844326, "loss": 0.0634, "theoretical_loss": 3.38438065601328, "tokens_seen": 2620915712 }, { "epoch": 0.79, "learning_rate": 0.00020775156475686085, "loss": 0.0629, "theoretical_loss": 3.3843541359261016, "tokens_seen": 2621177856 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7381111704358672, "debugging/entropy-1-grams": 5.723658601072414, "debugging/length": 472.25, "debugging/num_segments": 24, "debugging/raw_token_scores_avg": 0.007372692227363586, "debugging/raw_token_scores_std": 0.024886412546038628, "debugging/score": 0.004950147325261966, "debugging/score_std": 0.005382910365563692, "epoch": 0.79, "objective/train/advantage_avg": 0.0008712936541996896, "objective/train/docs_used": 953088, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3545817136764526, "objective/train/original_loss": 1.3545817136764526, "objective/train/theoretical_loss": 3.3843276192336167, "objective/train/tokens_used": 2641900000, "objective/train/value_avg": -0.00824737548828125, "objective/train/value_loss": 0.0002766008547041565, "objective/train/value_max": -2.9981136322021484e-05, "objective/train/value_min": -0.826171875, "objective/train/value_reward_corr": 0.744913716275677, "objective/train/value_std": 0.0185394287109375, "objective/train/weight_avg": 1.0009992122650146, "objective/train/weighted_lm_loss": 1.3555899858474731, "objective/train/weights_max": 2.2153494358062744, "objective/train/weights_min": 0.38233569264411926, "theoretical_loss": 3.3843276192336167, "tokens_seen": 2621440000 }, { "epoch": 0.79, "learning_rate": 0.00020767132081527846, "loss": 0.0665, "theoretical_loss": 3.3843276192336167, "tokens_seen": 2621440000 }, { "epoch": 0.79, "learning_rate": 0.00020759107687369605, "loss": 0.0635, "theoretical_loss": 3.3843011059350507, "tokens_seen": 2621702144 }, { "epoch": 0.79, "learning_rate": 0.00020751083293211363, "loss": 0.0655, "theoretical_loss": 3.3842745960296314, "tokens_seen": 2621964288 }, { "epoch": 0.79, "learning_rate": 0.00020743058899053122, "loss": 0.0652, "theoretical_loss": 3.384248089516584, "tokens_seen": 2622226432 }, { "epoch": 0.79, "learning_rate": 0.0002073503450489488, "loss": 0.0629, "theoretical_loss": 3.3842215863951353, "tokens_seen": 2622488576 }, { "epoch": 0.79, "learning_rate": 0.00020727010110736642, "loss": 0.0642, "theoretical_loss": 3.384195086664513, "tokens_seen": 2622750720 }, { "epoch": 0.79, "learning_rate": 0.00020718985716578397, "loss": 0.0647, "theoretical_loss": 3.3841685903239447, "tokens_seen": 2623012864 }, { "epoch": 0.79, "learning_rate": 0.0002071096132242016, "loss": 0.0688, "theoretical_loss": 3.3841420973726573, "tokens_seen": 2623275008 }, { "epoch": 0.8, "learning_rate": 0.00020702936928261917, "loss": 0.0686, "theoretical_loss": 3.3841156078098784, "tokens_seen": 2623537152 }, { "epoch": 0.8, "learning_rate": 0.00020694912534103673, "loss": 0.0673, "theoretical_loss": 3.384089121634837, "tokens_seen": 2623799296 }, { "epoch": 0.8, "learning_rate": 0.00020686888139945434, "loss": 0.066, "theoretical_loss": 3.384062638846761, "tokens_seen": 2624061440 }, { "epoch": 0.8, "learning_rate": 0.00020678863745787193, "loss": 0.0667, "theoretical_loss": 3.3840361594448787, "tokens_seen": 2624323584 }, { "epoch": 0.8, "learning_rate": 0.00020670839351628954, "loss": 0.0664, "theoretical_loss": 3.3840096834284195, "tokens_seen": 2624585728 }, { "epoch": 0.8, "learning_rate": 0.0002066281495747071, "loss": 0.0668, "theoretical_loss": 3.383983210796612, "tokens_seen": 2624847872 }, { "epoch": 0.8, "learning_rate": 0.0002065479056331247, "loss": 0.0687, "theoretical_loss": 3.383956741548686, "tokens_seen": 2625110016 }, { "epoch": 0.8, "learning_rate": 0.0002064676616915423, "loss": 0.0667, "theoretical_loss": 3.3839302756838707, "tokens_seen": 2625372160 }, { "epoch": 0.8, "learning_rate": 0.00020638741774995988, "loss": 0.0675, "theoretical_loss": 3.383903813201396, "tokens_seen": 2625634304 }, { "epoch": 0.8, "learning_rate": 0.00020630717380837747, "loss": 0.0636, "theoretical_loss": 3.383877354100492, "tokens_seen": 2625896448 }, { "epoch": 0.8, "learning_rate": 0.00020622692986679505, "loss": 0.0678, "theoretical_loss": 3.3838508983803894, "tokens_seen": 2626158592 }, { "epoch": 0.8, "learning_rate": 0.00020614668592521267, "loss": 0.0665, "theoretical_loss": 3.383824446040319, "tokens_seen": 2626420736 }, { "epoch": 0.8, "learning_rate": 0.00020606644198363025, "loss": 0.0647, "theoretical_loss": 3.383797997079511, "tokens_seen": 2626682880 }, { "epoch": 0.8, "learning_rate": 0.0002059861980420478, "loss": 0.0664, "theoretical_loss": 3.383771551497196, "tokens_seen": 2626945024 }, { "epoch": 0.8, "learning_rate": 0.00020590595410046542, "loss": 0.0673, "theoretical_loss": 3.383745109292607, "tokens_seen": 2627207168 }, { "epoch": 0.8, "learning_rate": 0.000205825710158883, "loss": 0.0657, "theoretical_loss": 3.3837186704649747, "tokens_seen": 2627469312 }, { "epoch": 0.8, "learning_rate": 0.0002057454662173006, "loss": 0.0654, "theoretical_loss": 3.383692235013531, "tokens_seen": 2627731456 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.0006061241729184985, "objective/train/docs_used": 955375, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3414796590805054, "objective/train/original_loss": 1.341479778289795, "objective/train/theoretical_loss": 3.3836658029375077, "objective/train/tokens_used": 2648453600, "objective/train/value_avg": -0.00621795654296875, "objective/train/value_loss": 0.00018008060578722507, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.488525390625, "objective/train/value_reward_corr": 0.7103285800180809, "objective/train/value_std": 0.01334381103515625, "objective/train/weight_avg": 1.0006861686706543, "objective/train/weighted_lm_loss": 1.3422250747680664, "objective/train/weights_max": 1.2228349447250366, "objective/train/weights_min": 0.37098827958106995, "theoretical_loss": 3.3836658029375077, "tokens_seen": 2627993600 }, { "epoch": 0.8, "learning_rate": 0.00020566522227571818, "loss": 0.066, "theoretical_loss": 3.3836658029375077, "tokens_seen": 2627993600 }, { "epoch": 0.8, "learning_rate": 0.0002055849783341358, "loss": 0.0655, "theoretical_loss": 3.3836393742361377, "tokens_seen": 2628255744 }, { "epoch": 0.8, "learning_rate": 0.00020550473439255338, "loss": 0.0659, "theoretical_loss": 3.3836129489086537, "tokens_seen": 2628517888 }, { "epoch": 0.8, "learning_rate": 0.00020542449045097094, "loss": 0.065, "theoretical_loss": 3.3835865269542884, "tokens_seen": 2628780032 }, { "epoch": 0.8, "learning_rate": 0.00020534424650938855, "loss": 0.0651, "theoretical_loss": 3.3835601083722744, "tokens_seen": 2629042176 }, { "epoch": 0.8, "learning_rate": 0.00020526400256780613, "loss": 0.0632, "theoretical_loss": 3.3835336931618456, "tokens_seen": 2629304320 }, { "epoch": 0.8, "learning_rate": 0.00020518375862622375, "loss": 0.0666, "theoretical_loss": 3.3835072813222355, "tokens_seen": 2629566464 }, { "epoch": 0.8, "learning_rate": 0.0002051035146846413, "loss": 0.0642, "theoretical_loss": 3.3834808728526786, "tokens_seen": 2629828608 }, { "epoch": 0.8, "learning_rate": 0.0002050232707430589, "loss": 0.0657, "theoretical_loss": 3.383454467752408, "tokens_seen": 2630090752 }, { "epoch": 0.8, "learning_rate": 0.0002049430268014765, "loss": 0.0644, "theoretical_loss": 3.3834280660206586, "tokens_seen": 2630352896 }, { "epoch": 0.8, "learning_rate": 0.00020486278285989406, "loss": 0.0628, "theoretical_loss": 3.3834016676566647, "tokens_seen": 2630615040 }, { "epoch": 0.8, "learning_rate": 0.00020478253891831167, "loss": 0.0684, "theoretical_loss": 3.383375272659662, "tokens_seen": 2630877184 }, { "epoch": 0.8, "learning_rate": 0.00020470229497672926, "loss": 0.0655, "theoretical_loss": 3.3833488810288843, "tokens_seen": 2631139328 }, { "epoch": 0.8, "learning_rate": 0.00020462205103514687, "loss": 0.0679, "theoretical_loss": 3.3833224927635683, "tokens_seen": 2631401472 }, { "epoch": 0.8, "learning_rate": 0.00020454180709356443, "loss": 0.0656, "theoretical_loss": 3.3832961078629493, "tokens_seen": 2631663616 }, { "epoch": 0.8, "learning_rate": 0.00020446156315198202, "loss": 0.0647, "theoretical_loss": 3.3832697263262625, "tokens_seen": 2631925760 }, { "epoch": 0.8, "learning_rate": 0.00020438131921039963, "loss": 0.0665, "theoretical_loss": 3.3832433481527446, "tokens_seen": 2632187904 }, { "epoch": 0.8, "learning_rate": 0.00020430107526881721, "loss": 0.0669, "theoretical_loss": 3.383216973341632, "tokens_seen": 2632450048 }, { "epoch": 0.8, "learning_rate": 0.0002042208313272348, "loss": 0.0645, "theoretical_loss": 3.383190601892161, "tokens_seen": 2632712192 }, { "epoch": 0.8, "learning_rate": 0.00020414058738565239, "loss": 0.0641, "theoretical_loss": 3.383164233803569, "tokens_seen": 2632974336 }, { "epoch": 0.8, "learning_rate": 0.00020406034344406997, "loss": 0.0631, "theoretical_loss": 3.3831378690750924, "tokens_seen": 2633236480 }, { "epoch": 0.8, "learning_rate": 0.00020398009950248756, "loss": 0.0639, "theoretical_loss": 3.383111507705969, "tokens_seen": 2633498624 }, { "epoch": 0.8, "learning_rate": 0.00020389985556090514, "loss": 0.0621, "theoretical_loss": 3.3830851496954364, "tokens_seen": 2633760768 }, { "epoch": 0.8, "learning_rate": 0.00020381961161932276, "loss": 0.0646, "theoretical_loss": 3.383058795042732, "tokens_seen": 2634022912 }, { "epoch": 0.8, "learning_rate": 0.00020373936767774034, "loss": 0.0658, "theoretical_loss": 3.3830324437470956, "tokens_seen": 2634285056 }, { "epoch": 0.8, "objective/train/advantage_avg": -0.000395024981116876, "objective/train/docs_used": 957617, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.215680718421936, "objective/train/original_loss": 1.2156805992126465, "objective/train/theoretical_loss": 3.3830060958077635, "objective/train/tokens_used": 2655007200, "objective/train/value_avg": -0.00763702392578125, "objective/train/value_loss": 0.00011784796515712515, "objective/train/value_max": -2.3186206817626953e-05, "objective/train/value_min": -0.247314453125, "objective/train/value_reward_corr": 0.8557826421660346, "objective/train/value_std": 0.0171051025390625, "objective/train/weight_avg": 0.9996615052223206, "objective/train/weighted_lm_loss": 1.2155773639678955, "objective/train/weights_max": 1.1127924919128418, "objective/train/weights_min": 0.38944852352142334, "theoretical_loss": 3.3830060958077635, "tokens_seen": 2634547200 }, { "epoch": 0.8, "learning_rate": 0.00020365912373615793, "loss": 0.064, "theoretical_loss": 3.3830060958077635, "tokens_seen": 2634547200 }, { "epoch": 0.8, "learning_rate": 0.0002035788797945755, "loss": 0.0674, "theoretical_loss": 3.382979751223975, "tokens_seen": 2634809344 }, { "epoch": 0.8, "learning_rate": 0.0002034986358529931, "loss": 0.0656, "theoretical_loss": 3.3829534099949696, "tokens_seen": 2635071488 }, { "epoch": 0.8, "learning_rate": 0.0002034183919114107, "loss": 0.0636, "theoretical_loss": 3.382927072119986, "tokens_seen": 2635333632 }, { "epoch": 0.8, "learning_rate": 0.00020333814796982827, "loss": 0.0654, "theoretical_loss": 3.382900737598263, "tokens_seen": 2635595776 }, { "epoch": 0.8, "learning_rate": 0.00020325790402824588, "loss": 0.0666, "theoretical_loss": 3.3828744064290412, "tokens_seen": 2635857920 }, { "epoch": 0.8, "learning_rate": 0.00020317766008666347, "loss": 0.0637, "theoretical_loss": 3.38284807861156, "tokens_seen": 2636120064 }, { "epoch": 0.8, "learning_rate": 0.00020309741614508102, "loss": 0.0628, "theoretical_loss": 3.382821754145059, "tokens_seen": 2636382208 }, { "epoch": 0.8, "learning_rate": 0.00020301717220349864, "loss": 0.066, "theoretical_loss": 3.3827954330287797, "tokens_seen": 2636644352 }, { "epoch": 0.8, "learning_rate": 0.00020293692826191622, "loss": 0.0662, "theoretical_loss": 3.3827691152619614, "tokens_seen": 2636906496 }, { "epoch": 0.8, "learning_rate": 0.00020285668432033384, "loss": 0.0649, "theoretical_loss": 3.382742800843846, "tokens_seen": 2637168640 }, { "epoch": 0.8, "learning_rate": 0.0002027764403787514, "loss": 0.0641, "theoretical_loss": 3.382716489773674, "tokens_seen": 2637430784 }, { "epoch": 0.8, "learning_rate": 0.000202696196437169, "loss": 0.0657, "theoretical_loss": 3.3826901820506867, "tokens_seen": 2637692928 }, { "epoch": 0.8, "learning_rate": 0.0002026159524955866, "loss": 0.0618, "theoretical_loss": 3.3826638776741262, "tokens_seen": 2637955072 }, { "epoch": 0.8, "learning_rate": 0.00020253570855400418, "loss": 0.0665, "theoretical_loss": 3.3826375766432344, "tokens_seen": 2638217216 }, { "epoch": 0.8, "learning_rate": 0.00020245546461242176, "loss": 0.064, "theoretical_loss": 3.3826112789572527, "tokens_seen": 2638479360 }, { "epoch": 0.8, "learning_rate": 0.00020237522067083935, "loss": 0.0645, "theoretical_loss": 3.382584984615424, "tokens_seen": 2638741504 }, { "epoch": 0.8, "learning_rate": 0.00020229497672925696, "loss": 0.0645, "theoretical_loss": 3.3825586936169905, "tokens_seen": 2639003648 }, { "epoch": 0.8, "learning_rate": 0.00020221473278767452, "loss": 0.0656, "theoretical_loss": 3.382532405961195, "tokens_seen": 2639265792 }, { "epoch": 0.8, "learning_rate": 0.0002021344888460921, "loss": 0.0646, "theoretical_loss": 3.3825061216472814, "tokens_seen": 2639527936 }, { "epoch": 0.8, "learning_rate": 0.00020205424490450972, "loss": 0.0637, "theoretical_loss": 3.382479840674492, "tokens_seen": 2639790080 }, { "epoch": 0.8, "learning_rate": 0.0002019740009629273, "loss": 0.0632, "theoretical_loss": 3.3824535630420707, "tokens_seen": 2640052224 }, { "epoch": 0.8, "learning_rate": 0.0002018937570213449, "loss": 0.064, "theoretical_loss": 3.3824272887492612, "tokens_seen": 2640314368 }, { "epoch": 0.8, "learning_rate": 0.00020181351307976247, "loss": 0.0659, "theoretical_loss": 3.382401017795308, "tokens_seen": 2640576512 }, { "epoch": 0.8, "learning_rate": 0.0002017332691381801, "loss": 0.0635, "theoretical_loss": 3.382374750179455, "tokens_seen": 2640838656 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.00020698863954748958, "objective/train/docs_used": 959716, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2583776712417603, "objective/train/original_loss": 1.2583776712417603, "objective/train/theoretical_loss": 3.3823484859009474, "objective/train/tokens_used": 2661560800, "objective/train/value_avg": -0.006702423095703125, "objective/train/value_loss": 0.00013055212912149727, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.459716796875, "objective/train/value_reward_corr": 0.760377423537848, "objective/train/value_std": 0.01239776611328125, "objective/train/weight_avg": 1.0002684593200684, "objective/train/weighted_lm_loss": 1.2586296796798706, "objective/train/weights_max": 1.1409211158752441, "objective/train/weights_min": 0.39653629064559937, "theoretical_loss": 3.3823484859009474, "tokens_seen": 2641100800 }, { "epoch": 0.8, "learning_rate": 0.00020165302519659767, "loss": 0.0615, "theoretical_loss": 3.3823484859009474, "tokens_seen": 2641100800 }, { "epoch": 0.8, "learning_rate": 0.00020157278125501523, "loss": 0.066, "theoretical_loss": 3.3823222249590286, "tokens_seen": 2641362944 }, { "epoch": 0.8, "learning_rate": 0.00020149253731343284, "loss": 0.0637, "theoretical_loss": 3.3822959673529454, "tokens_seen": 2641625088 }, { "epoch": 0.8, "learning_rate": 0.00020141229337185043, "loss": 0.0624, "theoretical_loss": 3.3822697130819415, "tokens_seen": 2641887232 }, { "epoch": 0.8, "learning_rate": 0.00020133204943026804, "loss": 0.0667, "theoretical_loss": 3.3822434621452637, "tokens_seen": 2642149376 }, { "epoch": 0.8, "learning_rate": 0.0002012518054886856, "loss": 0.0638, "theoretical_loss": 3.382217214542157, "tokens_seen": 2642411520 }, { "epoch": 0.8, "learning_rate": 0.0002011715615471032, "loss": 0.0613, "theoretical_loss": 3.3821909702718678, "tokens_seen": 2642673664 }, { "epoch": 0.8, "learning_rate": 0.0002010913176055208, "loss": 0.0609, "theoretical_loss": 3.382164729333642, "tokens_seen": 2642935808 }, { "epoch": 0.8, "learning_rate": 0.00020101107366393836, "loss": 0.066, "theoretical_loss": 3.3821384917267263, "tokens_seen": 2643197952 }, { "epoch": 0.8, "learning_rate": 0.00020093082972235597, "loss": 0.0631, "theoretical_loss": 3.382112257450368, "tokens_seen": 2643460096 }, { "epoch": 0.8, "learning_rate": 0.00020085058578077355, "loss": 0.0616, "theoretical_loss": 3.382086026503813, "tokens_seen": 2643722240 }, { "epoch": 0.8, "learning_rate": 0.00020077034183919117, "loss": 0.0643, "theoretical_loss": 3.3820597988863095, "tokens_seen": 2643984384 }, { "epoch": 0.8, "learning_rate": 0.00020069009789760873, "loss": 0.0658, "theoretical_loss": 3.382033574597105, "tokens_seen": 2644246528 }, { "epoch": 0.8, "learning_rate": 0.0002006098539560263, "loss": 0.0634, "theoretical_loss": 3.3820073536354465, "tokens_seen": 2644508672 }, { "epoch": 0.8, "learning_rate": 0.00020052961001444392, "loss": 0.0652, "theoretical_loss": 3.381981136000582, "tokens_seen": 2644770816 }, { "epoch": 0.8, "learning_rate": 0.0002004493660728615, "loss": 0.0659, "theoretical_loss": 3.3819549216917606, "tokens_seen": 2645032960 }, { "epoch": 0.8, "learning_rate": 0.0002003691221312791, "loss": 0.0657, "theoretical_loss": 3.3819287107082303, "tokens_seen": 2645295104 }, { "epoch": 0.8, "learning_rate": 0.00020028887818969668, "loss": 0.0654, "theoretical_loss": 3.38190250304924, "tokens_seen": 2645557248 }, { "epoch": 0.8, "learning_rate": 0.0002002086342481143, "loss": 0.0647, "theoretical_loss": 3.381876298714038, "tokens_seen": 2645819392 }, { "epoch": 0.8, "learning_rate": 0.00020012839030653185, "loss": 0.0627, "theoretical_loss": 3.3818500977018746, "tokens_seen": 2646081536 }, { "epoch": 0.8, "learning_rate": 0.00020004814636494944, "loss": 0.0617, "theoretical_loss": 3.381823900011998, "tokens_seen": 2646343680 }, { "epoch": 0.8, "learning_rate": 0.00019996790242336705, "loss": 0.0614, "theoretical_loss": 3.381797705643659, "tokens_seen": 2646605824 }, { "epoch": 0.8, "learning_rate": 0.00019988765848178463, "loss": 0.0643, "theoretical_loss": 3.3817715145961067, "tokens_seen": 2646867968 }, { "epoch": 0.8, "learning_rate": 0.00019980741454020222, "loss": 0.0639, "theoretical_loss": 3.3817453268685918, "tokens_seen": 2647130112 }, { "epoch": 0.8, "learning_rate": 0.0001997271705986198, "loss": 0.0634, "theoretical_loss": 3.3817191424603648, "tokens_seen": 2647392256 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.0011533270590007305, "objective/train/docs_used": 961712, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4470112323760986, "objective/train/original_loss": 1.4470112323760986, "objective/train/theoretical_loss": 3.3816929613706757, "objective/train/tokens_used": 2668114400, "objective/train/value_avg": -0.007129669189453125, "objective/train/value_loss": 0.00019181121024303138, "objective/train/value_max": -4.267692565917969e-05, "objective/train/value_min": -0.626953125, "objective/train/value_reward_corr": 0.645322537903916, "objective/train/value_std": 0.01311492919921875, "objective/train/weight_avg": 1.0012390613555908, "objective/train/weighted_lm_loss": 1.4489718675613403, "objective/train/weights_max": 1.632698893547058, "objective/train/weights_min": 0.3682536482810974, "theoretical_loss": 3.3816929613706757, "tokens_seen": 2647654400 }, { "epoch": 0.8, "learning_rate": 0.0001996469266570374, "loss": 0.0637, "theoretical_loss": 3.3816929613706757, "tokens_seen": 2647654400 }, { "epoch": 0.8, "learning_rate": 0.000199566682715455, "loss": 0.0629, "theoretical_loss": 3.381666783598776, "tokens_seen": 2647916544 }, { "epoch": 0.8, "learning_rate": 0.00019948643877387256, "loss": 0.0628, "theoretical_loss": 3.381640609143917, "tokens_seen": 2648178688 }, { "epoch": 0.8, "learning_rate": 0.00019940619483229017, "loss": 0.0615, "theoretical_loss": 3.3816144380053497, "tokens_seen": 2648440832 }, { "epoch": 0.8, "learning_rate": 0.00019932595089070776, "loss": 0.0646, "theoretical_loss": 3.3815882701823257, "tokens_seen": 2648702976 }, { "epoch": 0.8, "learning_rate": 0.00019924570694912535, "loss": 0.0665, "theoretical_loss": 3.3815621056740968, "tokens_seen": 2648965120 }, { "epoch": 0.8, "learning_rate": 0.00019916546300754293, "loss": 0.0625, "theoretical_loss": 3.381535944479916, "tokens_seen": 2649227264 }, { "epoch": 0.8, "learning_rate": 0.00019908521906596052, "loss": 0.0666, "theoretical_loss": 3.3815097865990342, "tokens_seen": 2649489408 }, { "epoch": 0.8, "learning_rate": 0.00019900497512437813, "loss": 0.0639, "theoretical_loss": 3.3814836320307053, "tokens_seen": 2649751552 }, { "epoch": 0.8, "learning_rate": 0.0001989247311827957, "loss": 0.0656, "theoretical_loss": 3.381457480774182, "tokens_seen": 2650013696 }, { "epoch": 0.8, "learning_rate": 0.0001988444872412133, "loss": 0.0635, "theoretical_loss": 3.3814313328287167, "tokens_seen": 2650275840 }, { "epoch": 0.8, "learning_rate": 0.00019876424329963089, "loss": 0.0609, "theoretical_loss": 3.381405188193563, "tokens_seen": 2650537984 }, { "epoch": 0.8, "learning_rate": 0.00019868399935804847, "loss": 0.0651, "theoretical_loss": 3.381379046867975, "tokens_seen": 2650800128 }, { "epoch": 0.8, "learning_rate": 0.00019860375541646606, "loss": 0.0636, "theoretical_loss": 3.381352908851206, "tokens_seen": 2651062272 }, { "epoch": 0.8, "learning_rate": 0.00019852351147488364, "loss": 0.0616, "theoretical_loss": 3.38132677414251, "tokens_seen": 2651324416 }, { "epoch": 0.8, "learning_rate": 0.00019844326753330126, "loss": 0.0677, "theoretical_loss": 3.381300642741142, "tokens_seen": 2651586560 }, { "epoch": 0.8, "learning_rate": 0.00019836302359171881, "loss": 0.0642, "theoretical_loss": 3.3812745146463556, "tokens_seen": 2651848704 }, { "epoch": 0.8, "learning_rate": 0.00019828277965013643, "loss": 0.0634, "theoretical_loss": 3.381248389857406, "tokens_seen": 2652110848 }, { "epoch": 0.8, "learning_rate": 0.000198202535708554, "loss": 0.0644, "theoretical_loss": 3.381222268373549, "tokens_seen": 2652372992 }, { "epoch": 0.8, "learning_rate": 0.0001981222917669716, "loss": 0.0654, "theoretical_loss": 3.3811961501940386, "tokens_seen": 2652635136 }, { "epoch": 0.8, "learning_rate": 0.00019804204782538918, "loss": 0.0631, "theoretical_loss": 3.3811700353181307, "tokens_seen": 2652897280 }, { "epoch": 0.8, "learning_rate": 0.00019796180388380677, "loss": 0.0603, "theoretical_loss": 3.3811439237450815, "tokens_seen": 2653159424 }, { "epoch": 0.8, "learning_rate": 0.00019788155994222438, "loss": 0.0623, "theoretical_loss": 3.381117815474147, "tokens_seen": 2653421568 }, { "epoch": 0.8, "learning_rate": 0.00019780131600064197, "loss": 0.064, "theoretical_loss": 3.381091710504583, "tokens_seen": 2653683712 }, { "epoch": 0.8, "learning_rate": 0.00019772107205905952, "loss": 0.0627, "theoretical_loss": 3.381065608835646, "tokens_seen": 2653945856 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.00042152591049671173, "objective/train/docs_used": 964093, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1780507564544678, "objective/train/original_loss": 1.1780505180358887, "objective/train/theoretical_loss": 3.381039510466593, "objective/train/tokens_used": 2674668000, "objective/train/value_avg": -0.01111602783203125, "objective/train/value_loss": 0.0005099286790937185, "objective/train/value_max": -4.57763671875e-05, "objective/train/value_min": -0.9345703125, "objective/train/value_reward_corr": 0.7172054336346624, "objective/train/value_std": 0.0220489501953125, "objective/train/weight_avg": 1.0006495714187622, "objective/train/weighted_lm_loss": 1.1777839660644531, "objective/train/weights_max": 1.7102184295654297, "objective/train/weights_min": 0.3874923884868622, "theoretical_loss": 3.381039510466593, "tokens_seen": 2654208000 }, { "epoch": 0.8, "learning_rate": 0.00019764082811747714, "loss": 0.0643, "theoretical_loss": 3.381039510466593, "tokens_seen": 2654208000 }, { "epoch": 0.8, "learning_rate": 0.00019756058417589472, "loss": 0.0641, "theoretical_loss": 3.3810134153966804, "tokens_seen": 2654470144 }, { "epoch": 0.8, "learning_rate": 0.0001974803402343123, "loss": 0.0612, "theoretical_loss": 3.3809873236251664, "tokens_seen": 2654732288 }, { "epoch": 0.8, "learning_rate": 0.0001974000962927299, "loss": 0.0655, "theoretical_loss": 3.380961235151308, "tokens_seen": 2654994432 }, { "epoch": 0.8, "learning_rate": 0.0001973198523511475, "loss": 0.0622, "theoretical_loss": 3.3809351499743623, "tokens_seen": 2655256576 }, { "epoch": 0.8, "learning_rate": 0.0001972396084095651, "loss": 0.0647, "theoretical_loss": 3.3809090680935876, "tokens_seen": 2655518720 }, { "epoch": 0.8, "learning_rate": 0.00019715936446798265, "loss": 0.0657, "theoretical_loss": 3.3808829895082426, "tokens_seen": 2655780864 }, { "epoch": 0.8, "learning_rate": 0.00019707912052640026, "loss": 0.0658, "theoretical_loss": 3.380856914217585, "tokens_seen": 2656043008 }, { "epoch": 0.8, "learning_rate": 0.00019699887658481785, "loss": 0.0663, "theoretical_loss": 3.380830842220874, "tokens_seen": 2656305152 }, { "epoch": 0.81, "learning_rate": 0.00019691863264323546, "loss": 0.0643, "theoretical_loss": 3.380804773517368, "tokens_seen": 2656567296 }, { "epoch": 0.81, "learning_rate": 0.00019683838870165302, "loss": 0.0671, "theoretical_loss": 3.380778708106326, "tokens_seen": 2656829440 }, { "epoch": 0.81, "learning_rate": 0.0001967581447600706, "loss": 0.0642, "theoretical_loss": 3.380752645987008, "tokens_seen": 2657091584 }, { "epoch": 0.81, "learning_rate": 0.00019667790081848822, "loss": 0.0674, "theoretical_loss": 3.3807265871586734, "tokens_seen": 2657353728 }, { "epoch": 0.81, "learning_rate": 0.0001965976568769058, "loss": 0.064, "theoretical_loss": 3.380700531620582, "tokens_seen": 2657615872 }, { "epoch": 0.81, "learning_rate": 0.0001965174129353234, "loss": 0.0622, "theoretical_loss": 3.380674479371993, "tokens_seen": 2657878016 }, { "epoch": 0.81, "learning_rate": 0.00019643716899374097, "loss": 0.065, "theoretical_loss": 3.3806484304121684, "tokens_seen": 2658140160 }, { "epoch": 0.81, "learning_rate": 0.0001963569250521586, "loss": 0.0659, "theoretical_loss": 3.3806223847403674, "tokens_seen": 2658402304 }, { "epoch": 0.81, "learning_rate": 0.00019627668111057615, "loss": 0.0657, "theoretical_loss": 3.380596342355852, "tokens_seen": 2658664448 }, { "epoch": 0.81, "learning_rate": 0.00019619643716899373, "loss": 0.0674, "theoretical_loss": 3.3805703032578815, "tokens_seen": 2658926592 }, { "epoch": 0.81, "learning_rate": 0.00019611619322741134, "loss": 0.0641, "theoretical_loss": 3.380544267445719, "tokens_seen": 2659188736 }, { "epoch": 0.81, "learning_rate": 0.00019603594928582893, "loss": 0.063, "theoretical_loss": 3.380518234918625, "tokens_seen": 2659450880 }, { "epoch": 0.81, "learning_rate": 0.00019595570534424651, "loss": 0.0664, "theoretical_loss": 3.3804922056758615, "tokens_seen": 2659713024 }, { "epoch": 0.81, "learning_rate": 0.0001958754614026641, "loss": 0.0629, "theoretical_loss": 3.38046617971669, "tokens_seen": 2659975168 }, { "epoch": 0.81, "learning_rate": 0.00019579521746108169, "loss": 0.0651, "theoretical_loss": 3.3804401570403737, "tokens_seen": 2660237312 }, { "epoch": 0.81, "learning_rate": 0.0001957149735194993, "loss": 0.0658, "theoretical_loss": 3.3804141376461745, "tokens_seen": 2660499456 }, { "epoch": 0.81, "objective/train/advantage_avg": 1.0176291652896907e-05, "objective/train/docs_used": 966512, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2832056283950806, "objective/train/original_loss": 1.283205509185791, "objective/train/theoretical_loss": 3.3803881215333553, "objective/train/tokens_used": 2681221600, "objective/train/value_avg": -0.0079803466796875, "objective/train/value_loss": 0.00024036792456172407, "objective/train/value_max": -4.470348358154297e-05, "objective/train/value_min": -0.86083984375, "objective/train/value_reward_corr": 0.6866655599257911, "objective/train/value_std": 0.01390838623046875, "objective/train/weight_avg": 1.0001158714294434, "objective/train/weighted_lm_loss": 1.2830950021743774, "objective/train/weights_max": 1.3873459100723267, "objective/train/weights_min": 0.3687421381473541, "theoretical_loss": 3.3803881215333553, "tokens_seen": 2660761600 }, { "epoch": 0.81, "learning_rate": 0.00019563472957791686, "loss": 0.0638, "theoretical_loss": 3.3803881215333553, "tokens_seen": 2660761600 }, { "epoch": 0.81, "learning_rate": 0.00019555448563633447, "loss": 0.0635, "theoretical_loss": 3.3803621087011786, "tokens_seen": 2661023744 }, { "epoch": 0.81, "learning_rate": 0.00019547424169475205, "loss": 0.0662, "theoretical_loss": 3.3803360991489084, "tokens_seen": 2661285888 }, { "epoch": 0.81, "learning_rate": 0.00019539399775316964, "loss": 0.0668, "theoretical_loss": 3.3803100928758076, "tokens_seen": 2661548032 }, { "epoch": 0.81, "learning_rate": 0.00019531375381158723, "loss": 0.0651, "theoretical_loss": 3.3802840898811395, "tokens_seen": 2661810176 }, { "epoch": 0.81, "learning_rate": 0.0001952335098700048, "loss": 0.0667, "theoretical_loss": 3.380258090164169, "tokens_seen": 2662072320 }, { "epoch": 0.81, "learning_rate": 0.00019515326592842242, "loss": 0.0666, "theoretical_loss": 3.380232093724159, "tokens_seen": 2662334464 }, { "epoch": 0.81, "learning_rate": 0.00019507302198683998, "loss": 0.0648, "theoretical_loss": 3.380206100560375, "tokens_seen": 2662596608 }, { "epoch": 0.81, "learning_rate": 0.0001949927780452576, "loss": 0.0686, "theoretical_loss": 3.380180110672081, "tokens_seen": 2662858752 }, { "epoch": 0.81, "learning_rate": 0.00019491253410367518, "loss": 0.0653, "theoretical_loss": 3.3801541240585418, "tokens_seen": 2663120896 }, { "epoch": 0.81, "learning_rate": 0.00019483229016209277, "loss": 0.0662, "theoretical_loss": 3.380128140719023, "tokens_seen": 2663383040 }, { "epoch": 0.81, "learning_rate": 0.00019475204622051035, "loss": 0.063, "theoretical_loss": 3.3801021606527897, "tokens_seen": 2663645184 }, { "epoch": 0.81, "learning_rate": 0.00019467180227892794, "loss": 0.0658, "theoretical_loss": 3.380076183859107, "tokens_seen": 2663907328 }, { "epoch": 0.81, "learning_rate": 0.00019459155833734555, "loss": 0.0654, "theoretical_loss": 3.380050210337241, "tokens_seen": 2664169472 }, { "epoch": 0.81, "learning_rate": 0.0001945113143957631, "loss": 0.0641, "theoretical_loss": 3.380024240086458, "tokens_seen": 2664431616 }, { "epoch": 0.81, "learning_rate": 0.00019443107045418072, "loss": 0.0629, "theoretical_loss": 3.379998273106024, "tokens_seen": 2664693760 }, { "epoch": 0.81, "learning_rate": 0.0001943508265125983, "loss": 0.0661, "theoretical_loss": 3.379972309395206, "tokens_seen": 2664955904 }, { "epoch": 0.81, "learning_rate": 0.0001942705825710159, "loss": 0.0646, "theoretical_loss": 3.3799463489532697, "tokens_seen": 2665218048 }, { "epoch": 0.81, "learning_rate": 0.00019419033862943348, "loss": 0.0635, "theoretical_loss": 3.379920391779483, "tokens_seen": 2665480192 }, { "epoch": 0.81, "learning_rate": 0.00019411009468785106, "loss": 0.0638, "theoretical_loss": 3.379894437873113, "tokens_seen": 2665742336 }, { "epoch": 0.81, "learning_rate": 0.00019402985074626867, "loss": 0.0649, "theoretical_loss": 3.3798684872334266, "tokens_seen": 2666004480 }, { "epoch": 0.81, "learning_rate": 0.00019394960680468626, "loss": 0.0661, "theoretical_loss": 3.379842539859692, "tokens_seen": 2666266624 }, { "epoch": 0.81, "learning_rate": 0.00019386936286310382, "loss": 0.0621, "theoretical_loss": 3.379816595751177, "tokens_seen": 2666528768 }, { "epoch": 0.81, "learning_rate": 0.00019378911892152143, "loss": 0.064, "theoretical_loss": 3.3797906549071506, "tokens_seen": 2666790912 }, { "epoch": 0.81, "learning_rate": 0.00019370887497993902, "loss": 0.0607, "theoretical_loss": 3.3797647173268794, "tokens_seen": 2667053056 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.0007202328997664154, "objective/train/docs_used": 968905, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2583950757980347, "objective/train/original_loss": 1.2583949565887451, "objective/train/theoretical_loss": 3.3797387830096337, "objective/train/tokens_used": 2687775200, "objective/train/value_avg": -0.00569915771484375, "objective/train/value_loss": 0.0001516796910436824, "objective/train/value_max": -2.9981136322021484e-05, "objective/train/value_min": -0.6875, "objective/train/value_reward_corr": 0.6477400895991619, "objective/train/value_std": 0.01117706298828125, "objective/train/weight_avg": 1.000789761543274, "objective/train/weighted_lm_loss": 1.258969783782959, "objective/train/weights_max": 1.2237309217453003, "objective/train/weights_min": 0.38235902786254883, "theoretical_loss": 3.3797387830096337, "tokens_seen": 2667315200 }, { "epoch": 0.81, "learning_rate": 0.0001936286310383566, "loss": 0.0653, "theoretical_loss": 3.3797387830096337, "tokens_seen": 2667315200 }, { "epoch": 0.81, "learning_rate": 0.0001935483870967742, "loss": 0.0651, "theoretical_loss": 3.379712851954681, "tokens_seen": 2667577344 }, { "epoch": 0.81, "learning_rate": 0.0001934681431551918, "loss": 0.0643, "theoretical_loss": 3.3796869241612915, "tokens_seen": 2667839488 }, { "epoch": 0.81, "learning_rate": 0.00019338789921360939, "loss": 0.065, "theoretical_loss": 3.379660999628734, "tokens_seen": 2668101632 }, { "epoch": 0.81, "learning_rate": 0.00019330765527202694, "loss": 0.0617, "theoretical_loss": 3.3796350783562783, "tokens_seen": 2668363776 }, { "epoch": 0.81, "learning_rate": 0.00019322741133044456, "loss": 0.0636, "theoretical_loss": 3.379609160343194, "tokens_seen": 2668625920 }, { "epoch": 0.81, "learning_rate": 0.00019314716738886214, "loss": 0.0673, "theoretical_loss": 3.3795832455887513, "tokens_seen": 2668888064 }, { "epoch": 0.81, "learning_rate": 0.00019306692344727976, "loss": 0.0629, "theoretical_loss": 3.3795573340922207, "tokens_seen": 2669150208 }, { "epoch": 0.81, "learning_rate": 0.00019298667950569731, "loss": 0.0638, "theoretical_loss": 3.379531425852872, "tokens_seen": 2669412352 }, { "epoch": 0.81, "learning_rate": 0.00019290643556411493, "loss": 0.0644, "theoretical_loss": 3.379505520869977, "tokens_seen": 2669674496 }, { "epoch": 0.81, "learning_rate": 0.0001928261916225325, "loss": 0.0628, "theoretical_loss": 3.379479619142806, "tokens_seen": 2669936640 }, { "epoch": 0.81, "learning_rate": 0.00019274594768095007, "loss": 0.063, "theoretical_loss": 3.3794537206706297, "tokens_seen": 2670198784 }, { "epoch": 0.81, "learning_rate": 0.00019266570373936768, "loss": 0.0628, "theoretical_loss": 3.379427825452721, "tokens_seen": 2670460928 }, { "epoch": 0.81, "learning_rate": 0.00019258545979778527, "loss": 0.0661, "theoretical_loss": 3.3794019334883503, "tokens_seen": 2670723072 }, { "epoch": 0.81, "learning_rate": 0.00019250521585620288, "loss": 0.0646, "theoretical_loss": 3.3793760447767904, "tokens_seen": 2670985216 }, { "epoch": 0.81, "learning_rate": 0.00019242497191462044, "loss": 0.0659, "theoretical_loss": 3.3793501593173128, "tokens_seen": 2671247360 }, { "epoch": 0.81, "learning_rate": 0.00019234472797303802, "loss": 0.0616, "theoretical_loss": 3.3793242771091903, "tokens_seen": 2671509504 }, { "epoch": 0.81, "learning_rate": 0.00019226448403145564, "loss": 0.0632, "theoretical_loss": 3.3792983981516955, "tokens_seen": 2671771648 }, { "epoch": 0.81, "learning_rate": 0.00019218424008987322, "loss": 0.0653, "theoretical_loss": 3.379272522444101, "tokens_seen": 2672033792 }, { "epoch": 0.81, "learning_rate": 0.0001921039961482908, "loss": 0.0646, "theoretical_loss": 3.3792466499856806, "tokens_seen": 2672295936 }, { "epoch": 0.81, "learning_rate": 0.0001920237522067084, "loss": 0.0636, "theoretical_loss": 3.3792207807757064, "tokens_seen": 2672558080 }, { "epoch": 0.81, "learning_rate": 0.000191943508265126, "loss": 0.0633, "theoretical_loss": 3.379194914813453, "tokens_seen": 2672820224 }, { "epoch": 0.81, "learning_rate": 0.00019186326432354356, "loss": 0.0642, "theoretical_loss": 3.379169052098194, "tokens_seen": 2673082368 }, { "epoch": 0.81, "learning_rate": 0.00019178302038196115, "loss": 0.0665, "theoretical_loss": 3.3791431926292033, "tokens_seen": 2673344512 }, { "epoch": 0.81, "learning_rate": 0.00019170277644037876, "loss": 0.0638, "theoretical_loss": 3.3791173364057547, "tokens_seen": 2673606656 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.0004674495430663228, "objective/train/docs_used": 971344, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4315048456192017, "objective/train/original_loss": 1.4315046072006226, "objective/train/theoretical_loss": 3.3790914834271235, "objective/train/tokens_used": 2694328800, "objective/train/value_avg": -0.006275177001953125, "objective/train/value_loss": 0.00015117165457922965, "objective/train/value_max": -1.6450881958007812e-05, "objective/train/value_min": -0.2232666015625, "objective/train/value_reward_corr": 0.6798207075826601, "objective/train/value_std": 0.01152801513671875, "objective/train/weight_avg": 1.0005346536636353, "objective/train/weighted_lm_loss": 1.4325766563415527, "objective/train/weights_max": 1.1521174907684326, "objective/train/weights_min": 0.3680993318557739, "theoretical_loss": 3.3790914834271235, "tokens_seen": 2673868800 }, { "epoch": 0.81, "learning_rate": 0.00019162253249879635, "loss": 0.0642, "theoretical_loss": 3.3790914834271235, "tokens_seen": 2673868800 }, { "epoch": 0.81, "learning_rate": 0.00019154228855721393, "loss": 0.0659, "theoretical_loss": 3.3790656336925844, "tokens_seen": 2674130944 }, { "epoch": 0.81, "learning_rate": 0.00019146204461563152, "loss": 0.0628, "theoretical_loss": 3.3790397872014117, "tokens_seen": 2674393088 }, { "epoch": 0.81, "learning_rate": 0.0001913818006740491, "loss": 0.0624, "theoretical_loss": 3.379013943952881, "tokens_seen": 2674655232 }, { "epoch": 0.81, "learning_rate": 0.00019130155673246672, "loss": 0.0665, "theoretical_loss": 3.3789881039462677, "tokens_seen": 2674917376 }, { "epoch": 0.81, "learning_rate": 0.00019122131279088428, "loss": 0.0644, "theoretical_loss": 3.3789622671808477, "tokens_seen": 2675179520 }, { "epoch": 0.81, "learning_rate": 0.0001911410688493019, "loss": 0.0644, "theoretical_loss": 3.3789364336558965, "tokens_seen": 2675441664 }, { "epoch": 0.81, "learning_rate": 0.00019106082490771947, "loss": 0.0647, "theoretical_loss": 3.3789106033706906, "tokens_seen": 2675703808 }, { "epoch": 0.81, "learning_rate": 0.0001909805809661371, "loss": 0.0617, "theoretical_loss": 3.378884776324506, "tokens_seen": 2675965952 }, { "epoch": 0.81, "learning_rate": 0.00019090033702455465, "loss": 0.0636, "theoretical_loss": 3.3788589525166195, "tokens_seen": 2676228096 }, { "epoch": 0.81, "learning_rate": 0.00019082009308297223, "loss": 0.067, "theoretical_loss": 3.378833131946308, "tokens_seen": 2676490240 }, { "epoch": 0.81, "learning_rate": 0.00019073984914138984, "loss": 0.0655, "theoretical_loss": 3.378807314612848, "tokens_seen": 2676752384 }, { "epoch": 0.81, "learning_rate": 0.0001906596051998074, "loss": 0.0678, "theoretical_loss": 3.378781500515518, "tokens_seen": 2677014528 }, { "epoch": 0.81, "learning_rate": 0.00019057936125822501, "loss": 0.0661, "theoretical_loss": 3.3787556896535937, "tokens_seen": 2677276672 }, { "epoch": 0.81, "learning_rate": 0.0001904991173166426, "loss": 0.0641, "theoretical_loss": 3.378729882026355, "tokens_seen": 2677538816 }, { "epoch": 0.81, "learning_rate": 0.00019041887337506019, "loss": 0.0649, "theoretical_loss": 3.3787040776330786, "tokens_seen": 2677800960 }, { "epoch": 0.81, "learning_rate": 0.00019033862943347777, "loss": 0.0643, "theoretical_loss": 3.3786782764730425, "tokens_seen": 2678063104 }, { "epoch": 0.81, "learning_rate": 0.00019025838549189536, "loss": 0.065, "theoretical_loss": 3.3786524785455256, "tokens_seen": 2678325248 }, { "epoch": 0.81, "learning_rate": 0.00019017814155031297, "loss": 0.0661, "theoretical_loss": 3.378626683849807, "tokens_seen": 2678587392 }, { "epoch": 0.81, "learning_rate": 0.00019009789760873055, "loss": 0.0682, "theoretical_loss": 3.378600892385165, "tokens_seen": 2678849536 }, { "epoch": 0.81, "learning_rate": 0.00019001765366714814, "loss": 0.0671, "theoretical_loss": 3.378575104150879, "tokens_seen": 2679111680 }, { "epoch": 0.81, "learning_rate": 0.00018993740972556573, "loss": 0.0641, "theoretical_loss": 3.378549319146228, "tokens_seen": 2679373824 }, { "epoch": 0.81, "learning_rate": 0.0001898571657839833, "loss": 0.0666, "theoretical_loss": 3.3785235373704925, "tokens_seen": 2679635968 }, { "epoch": 0.81, "learning_rate": 0.0001897769218424009, "loss": 0.0639, "theoretical_loss": 3.3784977588229514, "tokens_seen": 2679898112 }, { "epoch": 0.81, "learning_rate": 0.00018969667790081848, "loss": 0.0651, "theoretical_loss": 3.378471983502885, "tokens_seen": 2680160256 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.00010337762796552852, "objective/train/docs_used": 973756, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3529930114746094, "objective/train/original_loss": 1.3529927730560303, "objective/train/theoretical_loss": 3.378446211409574, "objective/train/tokens_used": 2700882400, "objective/train/value_avg": -0.01512908935546875, "objective/train/value_loss": 0.0003093662380706519, "objective/train/value_max": -2.5093555450439453e-05, "objective/train/value_min": -0.56396484375, "objective/train/value_reward_corr": 0.94607449504066, "objective/train/value_std": 0.051483154296875, "objective/train/weight_avg": 1.0002483129501343, "objective/train/weighted_lm_loss": 1.352254033088684, "objective/train/weights_max": 1.2205979824066162, "objective/train/weights_min": 0.3683340847492218, "theoretical_loss": 3.378446211409574, "tokens_seen": 2680422400 }, { "epoch": 0.81, "learning_rate": 0.0001896164339592361, "loss": 0.0654, "theoretical_loss": 3.378446211409574, "tokens_seen": 2680422400 }, { "epoch": 0.81, "learning_rate": 0.00018953619001765368, "loss": 0.0634, "theoretical_loss": 3.378420442542299, "tokens_seen": 2680684544 }, { "epoch": 0.81, "learning_rate": 0.00018945594607607124, "loss": 0.0638, "theoretical_loss": 3.37839467690034, "tokens_seen": 2680946688 }, { "epoch": 0.81, "learning_rate": 0.00018937570213448885, "loss": 0.0679, "theoretical_loss": 3.3783689144829783, "tokens_seen": 2681208832 }, { "epoch": 0.81, "learning_rate": 0.00018929545819290644, "loss": 0.0644, "theoretical_loss": 3.3783431552894956, "tokens_seen": 2681470976 }, { "epoch": 0.81, "learning_rate": 0.00018921521425132405, "loss": 0.065, "theoretical_loss": 3.378317399319173, "tokens_seen": 2681733120 }, { "epoch": 0.81, "learning_rate": 0.0001891349703097416, "loss": 0.063, "theoretical_loss": 3.378291646571292, "tokens_seen": 2681995264 }, { "epoch": 0.81, "learning_rate": 0.00018905472636815922, "loss": 0.0667, "theoretical_loss": 3.378265897045135, "tokens_seen": 2682257408 }, { "epoch": 0.81, "learning_rate": 0.0001889744824265768, "loss": 0.0656, "theoretical_loss": 3.3782401507399835, "tokens_seen": 2682519552 }, { "epoch": 0.81, "learning_rate": 0.00018889423848499436, "loss": 0.0636, "theoretical_loss": 3.3782144076551206, "tokens_seen": 2682781696 }, { "epoch": 0.81, "learning_rate": 0.00018881399454341198, "loss": 0.0653, "theoretical_loss": 3.3781886677898285, "tokens_seen": 2683043840 }, { "epoch": 0.81, "learning_rate": 0.00018873375060182956, "loss": 0.065, "theoretical_loss": 3.3781629311433896, "tokens_seen": 2683305984 }, { "epoch": 0.81, "learning_rate": 0.00018865350666024717, "loss": 0.0639, "theoretical_loss": 3.378137197715088, "tokens_seen": 2683568128 }, { "epoch": 0.81, "learning_rate": 0.00018857326271866473, "loss": 0.0645, "theoretical_loss": 3.3781114675042065, "tokens_seen": 2683830272 }, { "epoch": 0.81, "learning_rate": 0.00018849301877708232, "loss": 0.0671, "theoretical_loss": 3.3780857405100284, "tokens_seen": 2684092416 }, { "epoch": 0.81, "learning_rate": 0.00018841277483549993, "loss": 0.0664, "theoretical_loss": 3.3780600167318378, "tokens_seen": 2684354560 }, { "epoch": 0.81, "learning_rate": 0.00018833253089391752, "loss": 0.0646, "theoretical_loss": 3.3780342961689183, "tokens_seen": 2684616704 }, { "epoch": 0.81, "learning_rate": 0.0001882522869523351, "loss": 0.0648, "theoretical_loss": 3.3780085788205545, "tokens_seen": 2684878848 }, { "epoch": 0.81, "learning_rate": 0.0001881720430107527, "loss": 0.0658, "theoretical_loss": 3.3779828646860306, "tokens_seen": 2685140992 }, { "epoch": 0.81, "learning_rate": 0.0001880917990691703, "loss": 0.0651, "theoretical_loss": 3.377957153764631, "tokens_seen": 2685403136 }, { "epoch": 0.81, "learning_rate": 0.00018801155512758786, "loss": 0.0627, "theoretical_loss": 3.3779314460556416, "tokens_seen": 2685665280 }, { "epoch": 0.81, "learning_rate": 0.00018793131118600544, "loss": 0.0652, "theoretical_loss": 3.3779057415583464, "tokens_seen": 2685927424 }, { "epoch": 0.81, "learning_rate": 0.00018785106724442306, "loss": 0.0636, "theoretical_loss": 3.3778800402720313, "tokens_seen": 2686189568 }, { "epoch": 0.81, "learning_rate": 0.00018777082330284064, "loss": 0.0664, "theoretical_loss": 3.377854342195982, "tokens_seen": 2686451712 }, { "epoch": 0.81, "learning_rate": 0.00018769057936125823, "loss": 0.0633, "theoretical_loss": 3.3778286473294843, "tokens_seen": 2686713856 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.0004787496873177588, "objective/train/docs_used": 976171, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2660490274429321, "objective/train/original_loss": 1.2660491466522217, "objective/train/theoretical_loss": 3.377802955671824, "objective/train/tokens_used": 2707436000, "objective/train/value_avg": -0.0075531005859375, "objective/train/value_loss": 0.00026115079526789486, "objective/train/value_max": -3.427267074584961e-05, "objective/train/value_min": -0.9697265625, "objective/train/value_reward_corr": 0.7488763443348854, "objective/train/value_std": 0.01806640625, "objective/train/weight_avg": 1.0005987882614136, "objective/train/weighted_lm_loss": 1.2669463157653809, "objective/train/weights_max": 2.38258957862854, "objective/train/weights_min": 0.36950811743736267, "theoretical_loss": 3.377802955671824, "tokens_seen": 2686976000 }, { "epoch": 0.81, "learning_rate": 0.00018761033541967581, "loss": 0.0651, "theoretical_loss": 3.377802955671824, "tokens_seen": 2686976000 }, { "epoch": 0.81, "learning_rate": 0.0001875300914780934, "loss": 0.064, "theoretical_loss": 3.377777267222287, "tokens_seen": 2687238144 }, { "epoch": 0.81, "learning_rate": 0.000187449847536511, "loss": 0.0642, "theoretical_loss": 3.377751581980161, "tokens_seen": 2687500288 }, { "epoch": 0.81, "learning_rate": 0.00018736960359492857, "loss": 0.0645, "theoretical_loss": 3.3777258999447315, "tokens_seen": 2687762432 }, { "epoch": 0.81, "learning_rate": 0.00018728935965334618, "loss": 0.0668, "theoretical_loss": 3.377700221115286, "tokens_seen": 2688024576 }, { "epoch": 0.81, "learning_rate": 0.00018720911571176377, "loss": 0.0647, "theoretical_loss": 3.3776745454911117, "tokens_seen": 2688286720 }, { "epoch": 0.81, "learning_rate": 0.00018712887177018135, "loss": 0.0662, "theoretical_loss": 3.377648873071496, "tokens_seen": 2688548864 }, { "epoch": 0.81, "learning_rate": 0.00018704862782859894, "loss": 0.0648, "theoretical_loss": 3.377623203855727, "tokens_seen": 2688811008 }, { "epoch": 0.81, "learning_rate": 0.00018696838388701652, "loss": 0.0663, "theoretical_loss": 3.3775975378430916, "tokens_seen": 2689073152 }, { "epoch": 0.81, "learning_rate": 0.00018688813994543414, "loss": 0.0628, "theoretical_loss": 3.3775718750328783, "tokens_seen": 2689335296 }, { "epoch": 0.82, "learning_rate": 0.0001868078960038517, "loss": 0.0659, "theoretical_loss": 3.3775462154243754, "tokens_seen": 2689597440 }, { "epoch": 0.82, "learning_rate": 0.0001867276520622693, "loss": 0.0649, "theoretical_loss": 3.3775205590168715, "tokens_seen": 2689859584 }, { "epoch": 0.82, "learning_rate": 0.0001866474081206869, "loss": 0.0644, "theoretical_loss": 3.377494905809656, "tokens_seen": 2690121728 }, { "epoch": 0.82, "learning_rate": 0.00018656716417910448, "loss": 0.0641, "theoretical_loss": 3.377469255802017, "tokens_seen": 2690383872 }, { "epoch": 0.82, "learning_rate": 0.00018648692023752206, "loss": 0.0647, "theoretical_loss": 3.3774436089932443, "tokens_seen": 2690646016 }, { "epoch": 0.82, "learning_rate": 0.00018640667629593965, "loss": 0.0641, "theoretical_loss": 3.3774179653826266, "tokens_seen": 2690908160 }, { "epoch": 0.82, "learning_rate": 0.00018632643235435726, "loss": 0.0644, "theoretical_loss": 3.3773923249694544, "tokens_seen": 2691170304 }, { "epoch": 0.82, "learning_rate": 0.00018624618841277485, "loss": 0.0628, "theoretical_loss": 3.3773666877530175, "tokens_seen": 2691432448 }, { "epoch": 0.82, "learning_rate": 0.00018616594447119243, "loss": 0.0635, "theoretical_loss": 3.377341053732606, "tokens_seen": 2691694592 }, { "epoch": 0.82, "learning_rate": 0.00018608570052961002, "loss": 0.0657, "theoretical_loss": 3.37731542290751, "tokens_seen": 2691956736 }, { "epoch": 0.82, "learning_rate": 0.0001860054565880276, "loss": 0.0667, "theoretical_loss": 3.37728979527702, "tokens_seen": 2692218880 }, { "epoch": 0.82, "learning_rate": 0.0001859252126464452, "loss": 0.0612, "theoretical_loss": 3.3772641708404274, "tokens_seen": 2692481024 }, { "epoch": 0.82, "learning_rate": 0.00018584496870486278, "loss": 0.0636, "theoretical_loss": 3.3772385495970227, "tokens_seen": 2692743168 }, { "epoch": 0.82, "learning_rate": 0.0001857647247632804, "loss": 0.0639, "theoretical_loss": 3.3772129315460977, "tokens_seen": 2693005312 }, { "epoch": 0.82, "learning_rate": 0.00018568448082169797, "loss": 0.0644, "theoretical_loss": 3.3771873166869435, "tokens_seen": 2693267456 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.0006705496925860643, "objective/train/docs_used": 978638, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3295810222625732, "objective/train/original_loss": 1.3295810222625732, "objective/train/theoretical_loss": 3.377161705018852, "objective/train/tokens_used": 2713989600, "objective/train/value_avg": -0.00616455078125, "objective/train/value_loss": 0.0002441635588183999, "objective/train/value_max": -1.806020736694336e-05, "objective/train/value_min": -0.8525390625, "objective/train/value_reward_corr": 0.7188453937477017, "objective/train/value_std": 0.0169830322265625, "objective/train/weight_avg": 1.0007843971252441, "objective/train/weighted_lm_loss": 1.33126962184906, "objective/train/weights_max": 1.6763864755630493, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.377161705018852, "tokens_seen": 2693529600 }, { "epoch": 0.82, "learning_rate": 0.00018560423688011553, "loss": 0.0658, "theoretical_loss": 3.377161705018852, "tokens_seen": 2693529600 }, { "epoch": 0.82, "learning_rate": 0.00018552399293853315, "loss": 0.066, "theoretical_loss": 3.377136096541115, "tokens_seen": 2693791744 }, { "epoch": 0.82, "learning_rate": 0.00018544374899695073, "loss": 0.0653, "theoretical_loss": 3.377110491253024, "tokens_seen": 2694053888 }, { "epoch": 0.82, "learning_rate": 0.00018536350505536834, "loss": 0.0638, "theoretical_loss": 3.377084889153873, "tokens_seen": 2694316032 }, { "epoch": 0.82, "learning_rate": 0.0001852832611137859, "loss": 0.064, "theoretical_loss": 3.3770592902429537, "tokens_seen": 2694578176 }, { "epoch": 0.82, "learning_rate": 0.00018520301717220351, "loss": 0.0665, "theoretical_loss": 3.377033694519559, "tokens_seen": 2694840320 }, { "epoch": 0.82, "learning_rate": 0.0001851227732306211, "loss": 0.0639, "theoretical_loss": 3.377008101982982, "tokens_seen": 2695102464 }, { "epoch": 0.82, "learning_rate": 0.00018504252928903866, "loss": 0.0668, "theoretical_loss": 3.3769825126325164, "tokens_seen": 2695364608 }, { "epoch": 0.82, "learning_rate": 0.00018496228534745627, "loss": 0.0652, "theoretical_loss": 3.376956926467455, "tokens_seen": 2695626752 }, { "epoch": 0.82, "learning_rate": 0.00018488204140587386, "loss": 0.066, "theoretical_loss": 3.376931343487092, "tokens_seen": 2695888896 }, { "epoch": 0.82, "learning_rate": 0.00018480179746429147, "loss": 0.0645, "theoretical_loss": 3.3769057636907216, "tokens_seen": 2696151040 }, { "epoch": 0.82, "learning_rate": 0.00018472155352270903, "loss": 0.0644, "theoretical_loss": 3.3768801870776377, "tokens_seen": 2696413184 }, { "epoch": 0.82, "learning_rate": 0.0001846413095811266, "loss": 0.0639, "theoretical_loss": 3.3768546136471347, "tokens_seen": 2696675328 }, { "epoch": 0.82, "learning_rate": 0.00018456106563954423, "loss": 0.0675, "theoretical_loss": 3.376829043398507, "tokens_seen": 2696937472 }, { "epoch": 0.82, "learning_rate": 0.0001844808216979618, "loss": 0.064, "theoretical_loss": 3.37680347633105, "tokens_seen": 2697199616 }, { "epoch": 0.82, "learning_rate": 0.0001844005777563794, "loss": 0.0677, "theoretical_loss": 3.3767779124440587, "tokens_seen": 2697461760 }, { "epoch": 0.82, "learning_rate": 0.00018432033381479698, "loss": 0.0664, "theoretical_loss": 3.3767523517368288, "tokens_seen": 2697723904 }, { "epoch": 0.82, "learning_rate": 0.0001842400898732146, "loss": 0.0639, "theoretical_loss": 3.376726794208655, "tokens_seen": 2697986048 }, { "epoch": 0.82, "learning_rate": 0.00018415984593163215, "loss": 0.0621, "theoretical_loss": 3.3767012398588334, "tokens_seen": 2698248192 }, { "epoch": 0.82, "learning_rate": 0.00018407960199004974, "loss": 0.0636, "theoretical_loss": 3.3766756886866602, "tokens_seen": 2698510336 }, { "epoch": 0.82, "learning_rate": 0.00018399935804846735, "loss": 0.0642, "theoretical_loss": 3.3766501406914315, "tokens_seen": 2698772480 }, { "epoch": 0.82, "learning_rate": 0.00018391911410688494, "loss": 0.0664, "theoretical_loss": 3.376624595872444, "tokens_seen": 2699034624 }, { "epoch": 0.82, "learning_rate": 0.00018383887016530252, "loss": 0.0625, "theoretical_loss": 3.376599054228994, "tokens_seen": 2699296768 }, { "epoch": 0.82, "learning_rate": 0.0001837586262237201, "loss": 0.0627, "theoretical_loss": 3.3765735157603785, "tokens_seen": 2699558912 }, { "epoch": 0.82, "learning_rate": 0.00018367838228213772, "loss": 0.0667, "theoretical_loss": 3.376547980465895, "tokens_seen": 2699821056 }, { "epoch": 0.82, "objective/train/advantage_avg": -0.0002805391850415617, "objective/train/docs_used": 981079, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3751705884933472, "objective/train/original_loss": 1.3751707077026367, "objective/train/theoretical_loss": 3.3765224483448404, "objective/train/tokens_used": 2720543200, "objective/train/value_avg": -0.0082244873046875, "objective/train/value_loss": 0.0004667190951295197, "objective/train/value_max": -2.5451183319091797e-05, "objective/train/value_min": -0.94189453125, "objective/train/value_reward_corr": 0.7052109388437202, "objective/train/value_std": 0.02203369140625, "objective/train/weight_avg": 0.9999330639839172, "objective/train/weighted_lm_loss": 1.374388575553894, "objective/train/weights_max": 2.1484720706939697, "objective/train/weights_min": 0.3696533143520355, "theoretical_loss": 3.3765224483448404, "tokens_seen": 2700083200 }, { "epoch": 0.82, "learning_rate": 0.0001835981383405553, "loss": 0.0656, "theoretical_loss": 3.3765224483448404, "tokens_seen": 2700083200 }, { "epoch": 0.82, "learning_rate": 0.00018351789439897286, "loss": 0.0629, "theoretical_loss": 3.3764969193965126, "tokens_seen": 2700345344 }, { "epoch": 0.82, "learning_rate": 0.00018343765045739048, "loss": 0.066, "theoretical_loss": 3.376471393620209, "tokens_seen": 2700607488 }, { "epoch": 0.82, "learning_rate": 0.00018335740651580806, "loss": 0.0653, "theoretical_loss": 3.3764458710152283, "tokens_seen": 2700869632 }, { "epoch": 0.82, "learning_rate": 0.00018327716257422565, "loss": 0.0645, "theoretical_loss": 3.376420351580868, "tokens_seen": 2701131776 }, { "epoch": 0.82, "learning_rate": 0.00018319691863264323, "loss": 0.0657, "theoretical_loss": 3.376394835316427, "tokens_seen": 2701393920 }, { "epoch": 0.82, "learning_rate": 0.00018311667469106082, "loss": 0.066, "theoretical_loss": 3.3763693222212035, "tokens_seen": 2701656064 }, { "epoch": 0.82, "learning_rate": 0.00018303643074947843, "loss": 0.0647, "theoretical_loss": 3.3763438122944973, "tokens_seen": 2701918208 }, { "epoch": 0.82, "learning_rate": 0.000182956186807896, "loss": 0.0636, "theoretical_loss": 3.3763183055356065, "tokens_seen": 2702180352 }, { "epoch": 0.82, "learning_rate": 0.0001828759428663136, "loss": 0.0634, "theoretical_loss": 3.3762928019438316, "tokens_seen": 2702442496 }, { "epoch": 0.82, "learning_rate": 0.0001827956989247312, "loss": 0.065, "theoretical_loss": 3.376267301518471, "tokens_seen": 2702704640 }, { "epoch": 0.82, "learning_rate": 0.0001827154549831488, "loss": 0.0658, "theoretical_loss": 3.3762418042588256, "tokens_seen": 2702966784 }, { "epoch": 0.82, "learning_rate": 0.00018263521104156636, "loss": 0.0666, "theoretical_loss": 3.3762163101641947, "tokens_seen": 2703228928 }, { "epoch": 0.82, "learning_rate": 0.00018255496709998394, "loss": 0.064, "theoretical_loss": 3.3761908192338788, "tokens_seen": 2703491072 }, { "epoch": 0.82, "learning_rate": 0.00018247472315840156, "loss": 0.0638, "theoretical_loss": 3.3761653314671785, "tokens_seen": 2703753216 }, { "epoch": 0.82, "learning_rate": 0.00018239447921681912, "loss": 0.0661, "theoretical_loss": 3.376139846863394, "tokens_seen": 2704015360 }, { "epoch": 0.82, "learning_rate": 0.00018231423527523673, "loss": 0.0674, "theoretical_loss": 3.3761143654218264, "tokens_seen": 2704277504 }, { "epoch": 0.82, "learning_rate": 0.00018223399133365431, "loss": 0.0637, "theoretical_loss": 3.3760888871417776, "tokens_seen": 2704539648 }, { "epoch": 0.82, "learning_rate": 0.0001821537473920719, "loss": 0.0611, "theoretical_loss": 3.376063412022548, "tokens_seen": 2704801792 }, { "epoch": 0.82, "learning_rate": 0.00018207350345048948, "loss": 0.0653, "theoretical_loss": 3.3760379400634397, "tokens_seen": 2705063936 }, { "epoch": 0.82, "learning_rate": 0.00018199325950890707, "loss": 0.0649, "theoretical_loss": 3.3760124712637545, "tokens_seen": 2705326080 }, { "epoch": 0.82, "learning_rate": 0.00018191301556732468, "loss": 0.066, "theoretical_loss": 3.375987005622794, "tokens_seen": 2705588224 }, { "epoch": 0.82, "learning_rate": 0.00018183277162574227, "loss": 0.0671, "theoretical_loss": 3.3759615431398613, "tokens_seen": 2705850368 }, { "epoch": 0.82, "learning_rate": 0.00018175252768415985, "loss": 0.0642, "theoretical_loss": 3.3759360838142576, "tokens_seen": 2706112512 }, { "epoch": 0.82, "learning_rate": 0.00018167228374257744, "loss": 0.0632, "theoretical_loss": 3.375910627645287, "tokens_seen": 2706374656 }, { "epoch": 0.82, "objective/train/advantage_avg": -0.000171054300153628, "objective/train/docs_used": 983478, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2702794075012207, "objective/train/original_loss": 1.2702794075012207, "objective/train/theoretical_loss": 3.375885174632251, "objective/train/tokens_used": 2727096800, "objective/train/value_avg": -0.0064697265625, "objective/train/value_loss": 0.00020486005814746022, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.6494140625, "objective/train/value_reward_corr": 0.7068462014367565, "objective/train/value_std": 0.01386260986328125, "objective/train/weight_avg": 0.9999198913574219, "objective/train/weighted_lm_loss": 1.269945740699768, "objective/train/weights_max": 1.7154713869094849, "objective/train/weights_min": 0.23434561491012573, "theoretical_loss": 3.375885174632251, "tokens_seen": 2706636800 }, { "epoch": 0.82, "learning_rate": 0.00018159203980099502, "loss": 0.0658, "theoretical_loss": 3.375885174632251, "tokens_seen": 2706636800 }, { "epoch": 0.82, "learning_rate": 0.00018151179585941264, "loss": 0.0671, "theoretical_loss": 3.375859724774454, "tokens_seen": 2706898944 }, { "epoch": 0.82, "learning_rate": 0.0001814315519178302, "loss": 0.0663, "theoretical_loss": 3.375834278071199, "tokens_seen": 2707161088 }, { "epoch": 0.82, "learning_rate": 0.0001813513079762478, "loss": 0.0657, "theoretical_loss": 3.375808834521789, "tokens_seen": 2707423232 }, { "epoch": 0.82, "learning_rate": 0.0001812710640346654, "loss": 0.0644, "theoretical_loss": 3.375783394125529, "tokens_seen": 2707685376 }, { "epoch": 0.82, "learning_rate": 0.00018119082009308295, "loss": 0.0696, "theoretical_loss": 3.375757956881721, "tokens_seen": 2707947520 }, { "epoch": 0.82, "learning_rate": 0.00018111057615150057, "loss": 0.0715, "theoretical_loss": 3.3757325227896713, "tokens_seen": 2708209664 }, { "epoch": 0.82, "learning_rate": 0.00018103033220991815, "loss": 0.0688, "theoretical_loss": 3.3757070918486836, "tokens_seen": 2708471808 }, { "epoch": 0.82, "learning_rate": 0.00018095008826833576, "loss": 0.069, "theoretical_loss": 3.3756816640580625, "tokens_seen": 2708733952 }, { "epoch": 0.82, "learning_rate": 0.00018086984432675332, "loss": 0.0704, "theoretical_loss": 3.375656239417113, "tokens_seen": 2708996096 }, { "epoch": 0.82, "learning_rate": 0.00018078960038517093, "loss": 0.0658, "theoretical_loss": 3.37563081792514, "tokens_seen": 2709258240 }, { "epoch": 0.82, "learning_rate": 0.00018070935644358852, "loss": 0.0681, "theoretical_loss": 3.375605399581449, "tokens_seen": 2709520384 }, { "epoch": 0.82, "learning_rate": 0.0001806291125020061, "loss": 0.067, "theoretical_loss": 3.375579984385346, "tokens_seen": 2709782528 }, { "epoch": 0.82, "learning_rate": 0.0001805488685604237, "loss": 0.0641, "theoretical_loss": 3.375554572336136, "tokens_seen": 2710044672 }, { "epoch": 0.82, "learning_rate": 0.00018046862461884128, "loss": 0.0674, "theoretical_loss": 3.3755291634331255, "tokens_seen": 2710306816 }, { "epoch": 0.82, "learning_rate": 0.0001803883806772589, "loss": 0.0677, "theoretical_loss": 3.3755037576756206, "tokens_seen": 2710568960 }, { "epoch": 0.82, "learning_rate": 0.00018030813673567645, "loss": 0.0686, "theoretical_loss": 3.3754783550629277, "tokens_seen": 2710831104 }, { "epoch": 0.82, "learning_rate": 0.00018022789279409403, "loss": 0.0653, "theoretical_loss": 3.375452955594354, "tokens_seen": 2711093248 }, { "epoch": 0.82, "learning_rate": 0.00018014764885251165, "loss": 0.069, "theoretical_loss": 3.3754275592692053, "tokens_seen": 2711355392 }, { "epoch": 0.82, "learning_rate": 0.00018006740491092923, "loss": 0.0663, "theoretical_loss": 3.37540216608679, "tokens_seen": 2711617536 }, { "epoch": 0.82, "learning_rate": 0.00017998716096934682, "loss": 0.0644, "theoretical_loss": 3.3753767760464144, "tokens_seen": 2711879680 }, { "epoch": 0.82, "learning_rate": 0.0001799069170277644, "loss": 0.0679, "theoretical_loss": 3.3753513891473865, "tokens_seen": 2712141824 }, { "epoch": 0.82, "learning_rate": 0.00017982667308618201, "loss": 0.0647, "theoretical_loss": 3.375326005389014, "tokens_seen": 2712403968 }, { "epoch": 0.82, "learning_rate": 0.0001797464291445996, "loss": 0.0712, "theoretical_loss": 3.3753006247706048, "tokens_seen": 2712666112 }, { "epoch": 0.82, "learning_rate": 0.00017966618520301716, "loss": 0.0661, "theoretical_loss": 3.3752752472914676, "tokens_seen": 2712928256 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.00032114412169903517, "objective/train/docs_used": 985832, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3805224895477295, "objective/train/original_loss": 1.3805222511291504, "objective/train/theoretical_loss": 3.37524987295091, "objective/train/tokens_used": 2733650400, "objective/train/value_avg": -0.00806427001953125, "objective/train/value_loss": 0.0002658286248333752, "objective/train/value_max": -3.427267074584961e-05, "objective/train/value_min": -0.94384765625, "objective/train/value_reward_corr": 0.7371548868423696, "objective/train/value_std": 0.0176239013671875, "objective/train/weight_avg": 1.0004411935806274, "objective/train/weighted_lm_loss": 1.380434513092041, "objective/train/weights_max": 1.8222967386245728, "objective/train/weights_min": 0.3710845410823822, "theoretical_loss": 3.37524987295091, "tokens_seen": 2713190400 }, { "epoch": 0.82, "learning_rate": 0.00017958594126143477, "loss": 0.0704, "theoretical_loss": 3.37524987295091, "tokens_seen": 2713190400 }, { "epoch": 0.82, "learning_rate": 0.00017950569731985236, "loss": 0.0649, "theoretical_loss": 3.375224501748241, "tokens_seen": 2713452544 }, { "epoch": 0.82, "learning_rate": 0.00017942545337826994, "loss": 0.0666, "theoretical_loss": 3.3751991336827696, "tokens_seen": 2713714688 }, { "epoch": 0.82, "learning_rate": 0.00017934520943668753, "loss": 0.0657, "theoretical_loss": 3.375173768753805, "tokens_seen": 2713976832 }, { "epoch": 0.82, "learning_rate": 0.0001792649654951051, "loss": 0.0627, "theoretical_loss": 3.3751484069606565, "tokens_seen": 2714238976 }, { "epoch": 0.82, "learning_rate": 0.00017918472155352273, "loss": 0.068, "theoretical_loss": 3.375123048302633, "tokens_seen": 2714501120 }, { "epoch": 0.82, "learning_rate": 0.00017910447761194028, "loss": 0.0645, "theoretical_loss": 3.375097692779045, "tokens_seen": 2714763264 }, { "epoch": 0.82, "learning_rate": 0.0001790242336703579, "loss": 0.0657, "theoretical_loss": 3.3750723403892016, "tokens_seen": 2715025408 }, { "epoch": 0.82, "learning_rate": 0.00017894398972877548, "loss": 0.0634, "theoretical_loss": 3.375046991132414, "tokens_seen": 2715287552 }, { "epoch": 0.82, "learning_rate": 0.0001788637457871931, "loss": 0.068, "theoretical_loss": 3.375021645007992, "tokens_seen": 2715549696 }, { "epoch": 0.82, "learning_rate": 0.00017878350184561065, "loss": 0.0681, "theoretical_loss": 3.3749963020152465, "tokens_seen": 2715811840 }, { "epoch": 0.82, "learning_rate": 0.00017870325790402824, "loss": 0.0656, "theoretical_loss": 3.3749709621534882, "tokens_seen": 2716073984 }, { "epoch": 0.82, "learning_rate": 0.00017862301396244585, "loss": 0.0673, "theoretical_loss": 3.374945625422028, "tokens_seen": 2716336128 }, { "epoch": 0.82, "learning_rate": 0.0001785427700208634, "loss": 0.0654, "theoretical_loss": 3.374920291820177, "tokens_seen": 2716598272 }, { "epoch": 0.82, "learning_rate": 0.00017846252607928102, "loss": 0.0656, "theoretical_loss": 3.374894961347247, "tokens_seen": 2716860416 }, { "epoch": 0.82, "learning_rate": 0.0001783822821376986, "loss": 0.0711, "theoretical_loss": 3.37486963400255, "tokens_seen": 2717122560 }, { "epoch": 0.82, "learning_rate": 0.0001783020381961162, "loss": 0.072, "theoretical_loss": 3.3748443097853973, "tokens_seen": 2717384704 }, { "epoch": 0.82, "learning_rate": 0.00017822179425453378, "loss": 0.0672, "theoretical_loss": 3.374818988695101, "tokens_seen": 2717646848 }, { "epoch": 0.82, "learning_rate": 0.00017814155031295136, "loss": 0.0648, "theoretical_loss": 3.3747936707309747, "tokens_seen": 2717908992 }, { "epoch": 0.82, "learning_rate": 0.00017806130637136898, "loss": 0.0646, "theoretical_loss": 3.3747683558923294, "tokens_seen": 2718171136 }, { "epoch": 0.82, "learning_rate": 0.00017798106242978656, "loss": 0.0682, "theoretical_loss": 3.3747430441784787, "tokens_seen": 2718433280 }, { "epoch": 0.82, "learning_rate": 0.00017790081848820415, "loss": 0.0653, "theoretical_loss": 3.3747177355887352, "tokens_seen": 2718695424 }, { "epoch": 0.82, "learning_rate": 0.00017782057454662173, "loss": 0.0622, "theoretical_loss": 3.374692430122413, "tokens_seen": 2718957568 }, { "epoch": 0.82, "learning_rate": 0.00017774033060503932, "loss": 0.0681, "theoretical_loss": 3.3746671277788245, "tokens_seen": 2719219712 }, { "epoch": 0.82, "learning_rate": 0.0001776600866634569, "loss": 0.0678, "theoretical_loss": 3.3746418285572837, "tokens_seen": 2719481856 }, { "epoch": 0.82, "objective/train/advantage_avg": -0.0001307609345531091, "objective/train/docs_used": 988161, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2664278745651245, "objective/train/original_loss": 1.266427755355835, "objective/train/theoretical_loss": 3.3746165324571047, "objective/train/tokens_used": 2740204000, "objective/train/value_avg": -0.00821685791015625, "objective/train/value_loss": 0.00019688457541633397, "objective/train/value_max": -2.9981136322021484e-05, "objective/train/value_min": -0.94091796875, "objective/train/value_reward_corr": 0.8155150163275279, "objective/train/value_std": 0.0184478759765625, "objective/train/weight_avg": 0.9999621510505676, "objective/train/weighted_lm_loss": 1.2667092084884644, "objective/train/weights_max": 1.6394892930984497, "objective/train/weights_min": 0.38045015931129456, "theoretical_loss": 3.3746165324571047, "tokens_seen": 2719744000 }, { "epoch": 0.82, "learning_rate": 0.0001775798427218745, "loss": 0.0647, "theoretical_loss": 3.3746165324571047, "tokens_seen": 2719744000 }, { "epoch": 0.82, "learning_rate": 0.0001774995987802921, "loss": 0.0657, "theoretical_loss": 3.374591239477602, "tokens_seen": 2720006144 }, { "epoch": 0.82, "learning_rate": 0.0001774193548387097, "loss": 0.0698, "theoretical_loss": 3.3745659496180886, "tokens_seen": 2720268288 }, { "epoch": 0.82, "learning_rate": 0.00017733911089712725, "loss": 0.0684, "theoretical_loss": 3.3745406628778802, "tokens_seen": 2720530432 }, { "epoch": 0.82, "learning_rate": 0.00017725886695554486, "loss": 0.0684, "theoretical_loss": 3.374515379256291, "tokens_seen": 2720792576 }, { "epoch": 0.82, "learning_rate": 0.00017717862301396244, "loss": 0.0639, "theoretical_loss": 3.374490098752636, "tokens_seen": 2721054720 }, { "epoch": 0.82, "learning_rate": 0.00017709837907238006, "loss": 0.0657, "theoretical_loss": 3.3744648213662307, "tokens_seen": 2721316864 }, { "epoch": 0.82, "learning_rate": 0.00017701813513079762, "loss": 0.0673, "theoretical_loss": 3.37443954709639, "tokens_seen": 2721579008 }, { "epoch": 0.82, "learning_rate": 0.00017693789118921523, "loss": 0.0639, "theoretical_loss": 3.37441427594243, "tokens_seen": 2721841152 }, { "epoch": 0.82, "learning_rate": 0.00017685764724763281, "loss": 0.0667, "theoretical_loss": 3.3743890079036665, "tokens_seen": 2722103296 }, { "epoch": 0.82, "learning_rate": 0.0001767774033060504, "loss": 0.0619, "theoretical_loss": 3.374363742979415, "tokens_seen": 2722365440 }, { "epoch": 0.83, "learning_rate": 0.00017669715936446798, "loss": 0.0664, "theoretical_loss": 3.3743384811689925, "tokens_seen": 2722627584 }, { "epoch": 0.83, "learning_rate": 0.00017661691542288557, "loss": 0.0641, "theoretical_loss": 3.3743132224717147, "tokens_seen": 2722889728 }, { "epoch": 0.83, "learning_rate": 0.00017653667148130318, "loss": 0.0646, "theoretical_loss": 3.374287966886899, "tokens_seen": 2723151872 }, { "epoch": 0.83, "learning_rate": 0.00017645642753972074, "loss": 0.0635, "theoretical_loss": 3.3742627144138617, "tokens_seen": 2723414016 }, { "epoch": 0.83, "learning_rate": 0.00017637618359813833, "loss": 0.067, "theoretical_loss": 3.3742374650519205, "tokens_seen": 2723676160 }, { "epoch": 0.83, "learning_rate": 0.00017629593965655594, "loss": 0.0651, "theoretical_loss": 3.3742122188003925, "tokens_seen": 2723938304 }, { "epoch": 0.83, "learning_rate": 0.00017621569571497352, "loss": 0.0648, "theoretical_loss": 3.3741869756585947, "tokens_seen": 2724200448 }, { "epoch": 0.83, "learning_rate": 0.0001761354517733911, "loss": 0.0638, "theoretical_loss": 3.374161735625846, "tokens_seen": 2724462592 }, { "epoch": 0.83, "learning_rate": 0.0001760552078318087, "loss": 0.0653, "theoretical_loss": 3.3741364987014633, "tokens_seen": 2724724736 }, { "epoch": 0.83, "learning_rate": 0.0001759749638902263, "loss": 0.0672, "theoretical_loss": 3.3741112648847653, "tokens_seen": 2724986880 }, { "epoch": 0.83, "learning_rate": 0.0001758947199486439, "loss": 0.0636, "theoretical_loss": 3.374086034175071, "tokens_seen": 2725249024 }, { "epoch": 0.83, "learning_rate": 0.00017581447600706145, "loss": 0.0659, "theoretical_loss": 3.3740608065716975, "tokens_seen": 2725511168 }, { "epoch": 0.83, "learning_rate": 0.00017573423206547907, "loss": 0.065, "theoretical_loss": 3.374035582073965, "tokens_seen": 2725773312 }, { "epoch": 0.83, "learning_rate": 0.00017565398812389665, "loss": 0.0638, "theoretical_loss": 3.374010360681192, "tokens_seen": 2726035456 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.00022230023751035333, "objective/train/docs_used": 990556, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2834949493408203, "objective/train/original_loss": 1.2834949493408203, "objective/train/theoretical_loss": 3.373985142392698, "objective/train/tokens_used": 2746757600, "objective/train/value_avg": -0.007114410400390625, "objective/train/value_loss": 0.00023567344760522246, "objective/train/value_max": -2.753734588623047e-05, "objective/train/value_min": -0.27734375, "objective/train/value_reward_corr": 0.7233507761066421, "objective/train/value_std": 0.016265869140625, "objective/train/weight_avg": 1.0003267526626587, "objective/train/weighted_lm_loss": 1.2832162380218506, "objective/train/weights_max": 1.2012343406677246, "objective/train/weights_min": 0.36834847927093506, "theoretical_loss": 3.373985142392698, "tokens_seen": 2726297600 }, { "epoch": 0.83, "learning_rate": 0.00017557374418231424, "loss": 0.065, "theoretical_loss": 3.373985142392698, "tokens_seen": 2726297600 }, { "epoch": 0.83, "learning_rate": 0.00017549350024073182, "loss": 0.0668, "theoretical_loss": 3.3739599272078022, "tokens_seen": 2726559744 }, { "epoch": 0.83, "learning_rate": 0.00017541325629914943, "loss": 0.0653, "theoretical_loss": 3.3739347151258245, "tokens_seen": 2726821888 }, { "epoch": 0.83, "learning_rate": 0.00017533301235756702, "loss": 0.0641, "theoretical_loss": 3.3739095061460853, "tokens_seen": 2727084032 }, { "epoch": 0.83, "learning_rate": 0.00017525276841598458, "loss": 0.0653, "theoretical_loss": 3.3738843002679038, "tokens_seen": 2727346176 }, { "epoch": 0.83, "learning_rate": 0.0001751725244744022, "loss": 0.0671, "theoretical_loss": 3.3738590974906004, "tokens_seen": 2727608320 }, { "epoch": 0.83, "learning_rate": 0.00017509228053281978, "loss": 0.0652, "theoretical_loss": 3.3738338978134967, "tokens_seen": 2727870464 }, { "epoch": 0.83, "learning_rate": 0.0001750120365912374, "loss": 0.0691, "theoretical_loss": 3.3738087012359124, "tokens_seen": 2728132608 }, { "epoch": 0.83, "learning_rate": 0.00017493179264965495, "loss": 0.0657, "theoretical_loss": 3.3737835077571687, "tokens_seen": 2728394752 }, { "epoch": 0.83, "learning_rate": 0.00017485154870807253, "loss": 0.0655, "theoretical_loss": 3.3737583173765873, "tokens_seen": 2728656896 }, { "epoch": 0.83, "learning_rate": 0.00017477130476649015, "loss": 0.066, "theoretical_loss": 3.3737331300934894, "tokens_seen": 2728919040 }, { "epoch": 0.83, "learning_rate": 0.0001746910608249077, "loss": 0.0654, "theoretical_loss": 3.3737079459071966, "tokens_seen": 2729181184 }, { "epoch": 0.83, "learning_rate": 0.00017461081688332532, "loss": 0.0659, "theoretical_loss": 3.37368276481703, "tokens_seen": 2729443328 }, { "epoch": 0.83, "learning_rate": 0.0001745305729417429, "loss": 0.0635, "theoretical_loss": 3.373657586822313, "tokens_seen": 2729705472 }, { "epoch": 0.83, "learning_rate": 0.00017445032900016051, "loss": 0.0679, "theoretical_loss": 3.373632411922367, "tokens_seen": 2729967616 }, { "epoch": 0.83, "learning_rate": 0.00017437008505857807, "loss": 0.0665, "theoretical_loss": 3.373607240116515, "tokens_seen": 2730229760 }, { "epoch": 0.83, "learning_rate": 0.00017428984111699566, "loss": 0.0638, "theoretical_loss": 3.373582071404079, "tokens_seen": 2730491904 }, { "epoch": 0.83, "learning_rate": 0.00017420959717541327, "loss": 0.0693, "theoretical_loss": 3.3735569057843824, "tokens_seen": 2730754048 }, { "epoch": 0.83, "learning_rate": 0.00017412935323383086, "loss": 0.0667, "theoretical_loss": 3.373531743256748, "tokens_seen": 2731016192 }, { "epoch": 0.83, "learning_rate": 0.00017404910929224844, "loss": 0.064, "theoretical_loss": 3.3735065838205, "tokens_seen": 2731278336 }, { "epoch": 0.83, "learning_rate": 0.00017396886535066603, "loss": 0.0663, "theoretical_loss": 3.3734814274749607, "tokens_seen": 2731540480 }, { "epoch": 0.83, "learning_rate": 0.0001738886214090836, "loss": 0.0649, "theoretical_loss": 3.3734562742194547, "tokens_seen": 2731802624 }, { "epoch": 0.83, "learning_rate": 0.0001738083774675012, "loss": 0.0666, "theoretical_loss": 3.373431124053306, "tokens_seen": 2732064768 }, { "epoch": 0.83, "learning_rate": 0.00017372813352591878, "loss": 0.0654, "theoretical_loss": 3.373405976975838, "tokens_seen": 2732326912 }, { "epoch": 0.83, "learning_rate": 0.0001736478895843364, "loss": 0.0672, "theoretical_loss": 3.373380832986376, "tokens_seen": 2732589056 }, { "epoch": 0.83, "objective/train/advantage_avg": -0.00045747478725388646, "objective/train/docs_used": 992760, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4617254734039307, "objective/train/original_loss": 1.4617254734039307, "objective/train/theoretical_loss": 3.3733556920842442, "objective/train/tokens_used": 2753311200, "objective/train/value_avg": -0.0057525634765625, "objective/train/value_loss": 0.00013484605005942285, "objective/train/value_max": -4.363059997558594e-05, "objective/train/value_min": -0.63623046875, "objective/train/value_reward_corr": 0.7118297628842131, "objective/train/value_std": 0.01107025146484375, "objective/train/weight_avg": 0.9996037483215332, "objective/train/weighted_lm_loss": 1.4614465236663818, "objective/train/weights_max": 1.3987005949020386, "objective/train/weights_min": 0.36880823969841003, "theoretical_loss": 3.3733556920842442, "tokens_seen": 2732851200 }, { "epoch": 0.83, "learning_rate": 0.00017356764564275398, "loss": 0.0681, "theoretical_loss": 3.3733556920842442, "tokens_seen": 2732851200 }, { "epoch": 0.83, "learning_rate": 0.00017348740170117157, "loss": 0.0658, "theoretical_loss": 3.373330554268768, "tokens_seen": 2733113344 }, { "epoch": 0.83, "learning_rate": 0.00017340715775958915, "loss": 0.0627, "theoretical_loss": 3.373305419539271, "tokens_seen": 2733375488 }, { "epoch": 0.83, "learning_rate": 0.00017332691381800674, "loss": 0.0677, "theoretical_loss": 3.37328028789508, "tokens_seen": 2733637632 }, { "epoch": 0.83, "learning_rate": 0.00017324666987642435, "loss": 0.0646, "theoretical_loss": 3.37325515933552, "tokens_seen": 2733899776 }, { "epoch": 0.83, "learning_rate": 0.0001731664259348419, "loss": 0.0662, "theoretical_loss": 3.3732300338599157, "tokens_seen": 2734161920 }, { "epoch": 0.83, "learning_rate": 0.00017308618199325952, "loss": 0.0639, "theoretical_loss": 3.3732049114675946, "tokens_seen": 2734424064 }, { "epoch": 0.83, "learning_rate": 0.0001730059380516771, "loss": 0.066, "theoretical_loss": 3.373179792157882, "tokens_seen": 2734686208 }, { "epoch": 0.83, "learning_rate": 0.00017292569411009467, "loss": 0.0655, "theoretical_loss": 3.3731546759301034, "tokens_seen": 2734948352 }, { "epoch": 0.83, "learning_rate": 0.00017284545016851228, "loss": 0.0627, "theoretical_loss": 3.373129562783587, "tokens_seen": 2735210496 }, { "epoch": 0.83, "learning_rate": 0.00017276520622692986, "loss": 0.0661, "theoretical_loss": 3.373104452717658, "tokens_seen": 2735472640 }, { "epoch": 0.83, "learning_rate": 0.00017268496228534748, "loss": 0.0642, "theoretical_loss": 3.3730793457316444, "tokens_seen": 2735734784 }, { "epoch": 0.83, "learning_rate": 0.00017260471834376504, "loss": 0.0666, "theoretical_loss": 3.3730542418248732, "tokens_seen": 2735996928 }, { "epoch": 0.83, "learning_rate": 0.00017252447440218265, "loss": 0.0647, "theoretical_loss": 3.373029140996671, "tokens_seen": 2736259072 }, { "epoch": 0.83, "learning_rate": 0.00017244423046060023, "loss": 0.0624, "theoretical_loss": 3.3730040432463664, "tokens_seen": 2736521216 }, { "epoch": 0.83, "learning_rate": 0.00017236398651901782, "loss": 0.0651, "theoretical_loss": 3.372978948573287, "tokens_seen": 2736783360 }, { "epoch": 0.83, "learning_rate": 0.0001722837425774354, "loss": 0.0633, "theoretical_loss": 3.3729538569767596, "tokens_seen": 2737045504 }, { "epoch": 0.83, "learning_rate": 0.000172203498635853, "loss": 0.0642, "theoretical_loss": 3.3729287684561142, "tokens_seen": 2737307648 }, { "epoch": 0.83, "learning_rate": 0.0001721232546942706, "loss": 0.0644, "theoretical_loss": 3.3729036830106778, "tokens_seen": 2737569792 }, { "epoch": 0.83, "learning_rate": 0.0001720430107526882, "loss": 0.0681, "theoretical_loss": 3.3728786006397797, "tokens_seen": 2737831936 }, { "epoch": 0.83, "learning_rate": 0.00017196276681110575, "loss": 0.0658, "theoretical_loss": 3.372853521342749, "tokens_seen": 2738094080 }, { "epoch": 0.83, "learning_rate": 0.00017188252286952336, "loss": 0.0648, "theoretical_loss": 3.3728284451189143, "tokens_seen": 2738356224 }, { "epoch": 0.83, "learning_rate": 0.00017180227892794094, "loss": 0.0627, "theoretical_loss": 3.3728033719676045, "tokens_seen": 2738618368 }, { "epoch": 0.83, "learning_rate": 0.00017172203498635853, "loss": 0.064, "theoretical_loss": 3.3727783018881503, "tokens_seen": 2738880512 }, { "epoch": 0.83, "learning_rate": 0.00017164179104477612, "loss": 0.0669, "theoretical_loss": 3.3727532348798803, "tokens_seen": 2739142656 }, { "epoch": 0.83, "objective/train/advantage_avg": -0.00029088734299875796, "objective/train/docs_used": 995217, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3371988534927368, "objective/train/original_loss": 1.3371988534927368, "objective/train/theoretical_loss": 3.372728170942125, "objective/train/tokens_used": 2759864800, "objective/train/value_avg": -0.005031585693359375, "objective/train/value_loss": 0.00014523128629662097, "objective/train/value_max": -2.6464462280273438e-05, "objective/train/value_min": -0.346923828125, "objective/train/value_reward_corr": 0.6469031074700431, "objective/train/value_std": 0.00977325439453125, "objective/train/weight_avg": 0.9997756481170654, "objective/train/weighted_lm_loss": 1.3366706371307373, "objective/train/weights_max": 1.358128309249878, "objective/train/weights_min": 0.3687118589878082, "theoretical_loss": 3.372728170942125, "tokens_seen": 2739404800 }, { "epoch": 0.83, "learning_rate": 0.00017156154710319373, "loss": 0.0647, "theoretical_loss": 3.372728170942125, "tokens_seen": 2739404800 }, { "epoch": 0.83, "learning_rate": 0.00017148130316161131, "loss": 0.0678, "theoretical_loss": 3.3727031100742133, "tokens_seen": 2739666944 }, { "epoch": 0.83, "learning_rate": 0.00017140105922002887, "loss": 0.062, "theoretical_loss": 3.372678052275478, "tokens_seen": 2739929088 }, { "epoch": 0.83, "learning_rate": 0.00017132081527844648, "loss": 0.0626, "theoretical_loss": 3.372652997545247, "tokens_seen": 2740191232 }, { "epoch": 0.83, "learning_rate": 0.00017124057133686407, "loss": 0.0647, "theoretical_loss": 3.3726279458828525, "tokens_seen": 2740453376 }, { "epoch": 0.83, "learning_rate": 0.00017116032739528168, "loss": 0.0664, "theoretical_loss": 3.3726028972876247, "tokens_seen": 2740715520 }, { "epoch": 0.83, "learning_rate": 0.00017108008345369924, "loss": 0.0648, "theoretical_loss": 3.3725778517588956, "tokens_seen": 2740977664 }, { "epoch": 0.83, "learning_rate": 0.00017099983951211683, "loss": 0.0634, "theoretical_loss": 3.372552809295996, "tokens_seen": 2741239808 }, { "epoch": 0.83, "learning_rate": 0.00017091959557053444, "loss": 0.0655, "theoretical_loss": 3.372527769898258, "tokens_seen": 2741501952 }, { "epoch": 0.83, "learning_rate": 0.000170839351628952, "loss": 0.0656, "theoretical_loss": 3.372502733565012, "tokens_seen": 2741764096 }, { "epoch": 0.83, "learning_rate": 0.0001707591076873696, "loss": 0.0649, "theoretical_loss": 3.372477700295592, "tokens_seen": 2742026240 }, { "epoch": 0.83, "learning_rate": 0.0001706788637457872, "loss": 0.0644, "theoretical_loss": 3.372452670089329, "tokens_seen": 2742288384 }, { "epoch": 0.83, "learning_rate": 0.0001705986198042048, "loss": 0.0619, "theoretical_loss": 3.3724276429455555, "tokens_seen": 2742550528 }, { "epoch": 0.83, "learning_rate": 0.00017051837586262237, "loss": 0.0649, "theoretical_loss": 3.3724026188636045, "tokens_seen": 2742812672 }, { "epoch": 0.83, "learning_rate": 0.00017043813192103995, "loss": 0.0657, "theoretical_loss": 3.3723775978428083, "tokens_seen": 2743074816 }, { "epoch": 0.83, "learning_rate": 0.00017035788797945757, "loss": 0.0659, "theoretical_loss": 3.3723525798825005, "tokens_seen": 2743336960 }, { "epoch": 0.83, "learning_rate": 0.00017027764403787515, "loss": 0.0642, "theoretical_loss": 3.372327564982014, "tokens_seen": 2743599104 }, { "epoch": 0.83, "learning_rate": 0.00017019740009629274, "loss": 0.0628, "theoretical_loss": 3.372302553140682, "tokens_seen": 2743861248 }, { "epoch": 0.83, "learning_rate": 0.00017011715615471032, "loss": 0.0637, "theoretical_loss": 3.3722775443578388, "tokens_seen": 2744123392 }, { "epoch": 0.83, "learning_rate": 0.0001700369122131279, "loss": 0.0642, "theoretical_loss": 3.372252538632818, "tokens_seen": 2744385536 }, { "epoch": 0.83, "learning_rate": 0.0001699566682715455, "loss": 0.0653, "theoretical_loss": 3.372227535964954, "tokens_seen": 2744647680 }, { "epoch": 0.83, "learning_rate": 0.00016987642432996308, "loss": 0.0658, "theoretical_loss": 3.372202536353581, "tokens_seen": 2744909824 }, { "epoch": 0.83, "learning_rate": 0.0001697961803883807, "loss": 0.0606, "theoretical_loss": 3.3721775397980323, "tokens_seen": 2745171968 }, { "epoch": 0.83, "learning_rate": 0.00016971593644679828, "loss": 0.064, "theoretical_loss": 3.3721525462976443, "tokens_seen": 2745434112 }, { "epoch": 0.83, "learning_rate": 0.00016963569250521586, "loss": 0.0655, "theoretical_loss": 3.372127555851751, "tokens_seen": 2745696256 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.00043947980157099664, "objective/train/docs_used": 997819, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2819221019744873, "objective/train/original_loss": 1.2819222211837769, "objective/train/theoretical_loss": 3.372102568459688, "objective/train/tokens_used": 2766418400, "objective/train/value_avg": -0.0078277587890625, "objective/train/value_loss": 0.00024490803480148315, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.515625, "objective/train/value_reward_corr": 0.666362487041277, "objective/train/value_std": 0.01556396484375, "objective/train/weight_avg": 1.0005496740341187, "objective/train/weighted_lm_loss": 1.2824876308441162, "objective/train/weights_max": 1.6491175889968872, "objective/train/weights_min": 0.37588581442832947, "theoretical_loss": 3.372102568459688, "tokens_seen": 2745958400 }, { "epoch": 0.83, "learning_rate": 0.00016955544856363345, "loss": 0.0635, "theoretical_loss": 3.372102568459688, "tokens_seen": 2745958400 }, { "epoch": 0.83, "learning_rate": 0.00016947520462205103, "loss": 0.0661, "theoretical_loss": 3.372077584120791, "tokens_seen": 2746220544 }, { "epoch": 0.83, "learning_rate": 0.00016939496068046865, "loss": 0.0622, "theoretical_loss": 3.372052602834394, "tokens_seen": 2746482688 }, { "epoch": 0.83, "learning_rate": 0.0001693147167388862, "loss": 0.0665, "theoretical_loss": 3.372027624599834, "tokens_seen": 2746744832 }, { "epoch": 0.83, "learning_rate": 0.00016923447279730382, "loss": 0.0624, "theoretical_loss": 3.372002649416447, "tokens_seen": 2747006976 }, { "epoch": 0.83, "learning_rate": 0.0001691542288557214, "loss": 0.0629, "theoretical_loss": 3.3719776772835686, "tokens_seen": 2747269120 }, { "epoch": 0.83, "learning_rate": 0.00016907398491413896, "loss": 0.0645, "theoretical_loss": 3.3719527082005354, "tokens_seen": 2747531264 }, { "epoch": 0.83, "learning_rate": 0.00016899374097255657, "loss": 0.0656, "theoretical_loss": 3.371927742166684, "tokens_seen": 2747793408 }, { "epoch": 0.83, "learning_rate": 0.00016891349703097416, "loss": 0.0631, "theoretical_loss": 3.371902779181352, "tokens_seen": 2748055552 }, { "epoch": 0.83, "learning_rate": 0.00016883325308939177, "loss": 0.0662, "theoretical_loss": 3.3718778192438745, "tokens_seen": 2748317696 }, { "epoch": 0.83, "learning_rate": 0.00016875300914780933, "loss": 0.0646, "theoretical_loss": 3.371852862353591, "tokens_seen": 2748579840 }, { "epoch": 0.83, "learning_rate": 0.00016867276520622694, "loss": 0.0653, "theoretical_loss": 3.371827908509837, "tokens_seen": 2748841984 }, { "epoch": 0.83, "learning_rate": 0.00016859252126464453, "loss": 0.0649, "theoretical_loss": 3.371802957711951, "tokens_seen": 2749104128 }, { "epoch": 0.83, "learning_rate": 0.0001685122773230621, "loss": 0.0656, "theoretical_loss": 3.3717780099592707, "tokens_seen": 2749366272 }, { "epoch": 0.83, "learning_rate": 0.0001684320333814797, "loss": 0.0661, "theoretical_loss": 3.371753065251134, "tokens_seen": 2749628416 }, { "epoch": 0.83, "learning_rate": 0.00016835178943989728, "loss": 0.0628, "theoretical_loss": 3.3717281235868795, "tokens_seen": 2749890560 }, { "epoch": 0.83, "learning_rate": 0.0001682715454983149, "loss": 0.0633, "theoretical_loss": 3.3717031849658454, "tokens_seen": 2750152704 }, { "epoch": 0.83, "learning_rate": 0.00016819130155673246, "loss": 0.0648, "theoretical_loss": 3.3716782493873705, "tokens_seen": 2750414848 }, { "epoch": 0.83, "learning_rate": 0.00016811105761515004, "loss": 0.0649, "theoretical_loss": 3.371653316850794, "tokens_seen": 2750676992 }, { "epoch": 0.83, "learning_rate": 0.00016803081367356765, "loss": 0.066, "theoretical_loss": 3.371628387355454, "tokens_seen": 2750939136 }, { "epoch": 0.83, "learning_rate": 0.00016795056973198524, "loss": 0.0624, "theoretical_loss": 3.3716034609006904, "tokens_seen": 2751201280 }, { "epoch": 0.83, "learning_rate": 0.00016787032579040282, "loss": 0.0661, "theoretical_loss": 3.3715785374858425, "tokens_seen": 2751463424 }, { "epoch": 0.83, "learning_rate": 0.0001677900818488204, "loss": 0.0626, "theoretical_loss": 3.3715536171102505, "tokens_seen": 2751725568 }, { "epoch": 0.83, "learning_rate": 0.00016770983790723802, "loss": 0.0624, "theoretical_loss": 3.3715286997732536, "tokens_seen": 2751987712 }, { "epoch": 0.83, "learning_rate": 0.0001676295939656556, "loss": 0.0638, "theoretical_loss": 3.3715037854741925, "tokens_seen": 2752249856 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.0006543775671161711, "objective/train/docs_used": 1000102, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3108874559402466, "objective/train/original_loss": 1.3108872175216675, "objective/train/theoretical_loss": 3.371478874212407, "objective/train/tokens_used": 2772972000, "objective/train/value_avg": -0.00687408447265625, "objective/train/value_loss": 0.00016098450578283519, "objective/train/value_max": -3.349781036376953e-05, "objective/train/value_min": -0.63232421875, "objective/train/value_reward_corr": 0.7514518038976055, "objective/train/value_std": 0.0152587890625, "objective/train/weight_avg": 1.0007280111312866, "objective/train/weighted_lm_loss": 1.3118845224380493, "objective/train/weights_max": 1.4685527086257935, "objective/train/weights_min": 0.37259092926979065, "theoretical_loss": 3.371478874212407, "tokens_seen": 2752512000 }, { "epoch": 0.83, "learning_rate": 0.00016754935002407317, "loss": 0.0649, "theoretical_loss": 3.371478874212407, "tokens_seen": 2752512000 }, { "epoch": 0.83, "learning_rate": 0.00016746910608249078, "loss": 0.0645, "theoretical_loss": 3.371453965987238, "tokens_seen": 2752774144 }, { "epoch": 0.83, "learning_rate": 0.00016738886214090836, "loss": 0.0645, "theoretical_loss": 3.371429060798026, "tokens_seen": 2753036288 }, { "epoch": 0.83, "learning_rate": 0.00016730861819932598, "loss": 0.0676, "theoretical_loss": 3.371404158644112, "tokens_seen": 2753298432 }, { "epoch": 0.83, "learning_rate": 0.00016722837425774354, "loss": 0.0648, "theoretical_loss": 3.3713792595248373, "tokens_seen": 2753560576 }, { "epoch": 0.83, "learning_rate": 0.00016714813031616112, "loss": 0.0639, "theoretical_loss": 3.3713543634395426, "tokens_seen": 2753822720 }, { "epoch": 0.83, "learning_rate": 0.00016706788637457873, "loss": 0.0638, "theoretical_loss": 3.3713294703875705, "tokens_seen": 2754084864 }, { "epoch": 0.83, "learning_rate": 0.0001669876424329963, "loss": 0.064, "theoretical_loss": 3.371304580368262, "tokens_seen": 2754347008 }, { "epoch": 0.83, "learning_rate": 0.0001669073984914139, "loss": 0.0634, "theoretical_loss": 3.3712796933809592, "tokens_seen": 2754609152 }, { "epoch": 0.83, "learning_rate": 0.0001668271545498315, "loss": 0.0644, "theoretical_loss": 3.3712548094250043, "tokens_seen": 2754871296 }, { "epoch": 0.83, "learning_rate": 0.0001667469106082491, "loss": 0.0638, "theoretical_loss": 3.37122992849974, "tokens_seen": 2755133440 }, { "epoch": 0.84, "learning_rate": 0.00016666666666666666, "loss": 0.0677, "theoretical_loss": 3.371205050604508, "tokens_seen": 2755395584 }, { "epoch": 0.84, "learning_rate": 0.00016658642272508425, "loss": 0.0654, "theoretical_loss": 3.371180175738652, "tokens_seen": 2755657728 }, { "epoch": 0.84, "learning_rate": 0.00016650617878350186, "loss": 0.067, "theoretical_loss": 3.371155303901514, "tokens_seen": 2755919872 }, { "epoch": 0.84, "learning_rate": 0.00016642593484191944, "loss": 0.0653, "theoretical_loss": 3.3711304350924385, "tokens_seen": 2756182016 }, { "epoch": 0.84, "learning_rate": 0.00016634569090033703, "loss": 0.0641, "theoretical_loss": 3.371105569310768, "tokens_seen": 2756444160 }, { "epoch": 0.84, "learning_rate": 0.00016626544695875462, "loss": 0.0642, "theoretical_loss": 3.371080706555846, "tokens_seen": 2756706304 }, { "epoch": 0.84, "learning_rate": 0.00016618520301717223, "loss": 0.0637, "theoretical_loss": 3.3710558468270166, "tokens_seen": 2756968448 }, { "epoch": 0.84, "learning_rate": 0.0001661049590755898, "loss": 0.0682, "theoretical_loss": 3.371030990123624, "tokens_seen": 2757230592 }, { "epoch": 0.84, "learning_rate": 0.00016602471513400737, "loss": 0.0663, "theoretical_loss": 3.371006136445012, "tokens_seen": 2757492736 }, { "epoch": 0.84, "learning_rate": 0.00016594447119242498, "loss": 0.0663, "theoretical_loss": 3.3709812857905255, "tokens_seen": 2757754880 }, { "epoch": 0.84, "learning_rate": 0.00016586422725084257, "loss": 0.0679, "theoretical_loss": 3.3709564381595087, "tokens_seen": 2758017024 }, { "epoch": 0.84, "learning_rate": 0.00016578398330926016, "loss": 0.0654, "theoretical_loss": 3.370931593551307, "tokens_seen": 2758279168 }, { "epoch": 0.84, "learning_rate": 0.00016570373936767774, "loss": 0.0653, "theoretical_loss": 3.3709067519652645, "tokens_seen": 2758541312 }, { "epoch": 0.84, "learning_rate": 0.00016562349542609533, "loss": 0.0662, "theoretical_loss": 3.370881913400727, "tokens_seen": 2758803456 }, { "epoch": 0.84, "objective/train/advantage_avg": -0.0003122219059150666, "objective/train/docs_used": 1002432, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3762762546539307, "objective/train/original_loss": 1.3762761354446411, "objective/train/theoretical_loss": 3.37085707785704, "objective/train/tokens_used": 2779525600, "objective/train/value_avg": -0.007312774658203125, "objective/train/value_loss": 0.0001845854421844706, "objective/train/value_max": -1.895427703857422e-05, "objective/train/value_min": -0.83056640625, "objective/train/value_reward_corr": 0.757421269593654, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 0.9997718334197998, "objective/train/weighted_lm_loss": 1.3756195306777954, "objective/train/weights_max": 1.1826108694076538, "objective/train/weights_min": 0.38129866123199463, "theoretical_loss": 3.37085707785704, "tokens_seen": 2759065600 }, { "epoch": 0.84, "learning_rate": 0.00016554325148451294, "loss": 0.0672, "theoretical_loss": 3.37085707785704, "tokens_seen": 2759065600 }, { "epoch": 0.84, "learning_rate": 0.0001654630075429305, "loss": 0.0641, "theoretical_loss": 3.3708322453335486, "tokens_seen": 2759327744 }, { "epoch": 0.84, "learning_rate": 0.0001653827636013481, "loss": 0.0652, "theoretical_loss": 3.3708074158295993, "tokens_seen": 2759589888 }, { "epoch": 0.84, "learning_rate": 0.0001653025196597657, "loss": 0.0614, "theoretical_loss": 3.3707825893445382, "tokens_seen": 2759852032 }, { "epoch": 0.84, "learning_rate": 0.00016522227571818328, "loss": 0.0662, "theoretical_loss": 3.3707577658777113, "tokens_seen": 2760114176 }, { "epoch": 0.84, "learning_rate": 0.00016514203177660087, "loss": 0.0641, "theoretical_loss": 3.3707329454284647, "tokens_seen": 2760376320 }, { "epoch": 0.84, "learning_rate": 0.00016506178783501845, "loss": 0.0675, "theoretical_loss": 3.3707081279961453, "tokens_seen": 2760638464 }, { "epoch": 0.84, "learning_rate": 0.00016498154389343607, "loss": 0.0659, "theoretical_loss": 3.3706833135801, "tokens_seen": 2760900608 }, { "epoch": 0.84, "learning_rate": 0.00016490129995185362, "loss": 0.0636, "theoretical_loss": 3.370658502179676, "tokens_seen": 2761162752 }, { "epoch": 0.84, "learning_rate": 0.00016482105601027124, "loss": 0.066, "theoretical_loss": 3.370633693794221, "tokens_seen": 2761424896 }, { "epoch": 0.84, "learning_rate": 0.00016474081206868882, "loss": 0.0675, "theoretical_loss": 3.3706088884230816, "tokens_seen": 2761687040 }, { "epoch": 0.84, "learning_rate": 0.0001646605681271064, "loss": 0.0635, "theoretical_loss": 3.3705840860656053, "tokens_seen": 2761949184 }, { "epoch": 0.84, "learning_rate": 0.000164580324185524, "loss": 0.0622, "theoretical_loss": 3.370559286721141, "tokens_seen": 2762211328 }, { "epoch": 0.84, "learning_rate": 0.00016450008024394158, "loss": 0.0646, "theoretical_loss": 3.3705344903890366, "tokens_seen": 2762473472 }, { "epoch": 0.84, "learning_rate": 0.0001644198363023592, "loss": 0.0663, "theoretical_loss": 3.3705096970686395, "tokens_seen": 2762735616 }, { "epoch": 0.84, "learning_rate": 0.00016433959236077675, "loss": 0.0674, "theoretical_loss": 3.370484906759299, "tokens_seen": 2762997760 }, { "epoch": 0.84, "learning_rate": 0.00016425934841919436, "loss": 0.0612, "theoretical_loss": 3.370460119460364, "tokens_seen": 2763259904 }, { "epoch": 0.84, "learning_rate": 0.00016417910447761195, "loss": 0.0639, "theoretical_loss": 3.3704353351711824, "tokens_seen": 2763522048 }, { "epoch": 0.84, "learning_rate": 0.00016409886053602953, "loss": 0.0645, "theoretical_loss": 3.3704105538911038, "tokens_seen": 2763784192 }, { "epoch": 0.84, "learning_rate": 0.00016401861659444712, "loss": 0.0657, "theoretical_loss": 3.3703857756194777, "tokens_seen": 2764046336 }, { "epoch": 0.84, "learning_rate": 0.0001639383726528647, "loss": 0.0663, "theoretical_loss": 3.3703610003556537, "tokens_seen": 2764308480 }, { "epoch": 0.84, "learning_rate": 0.00016385812871128232, "loss": 0.0659, "theoretical_loss": 3.370336228098981, "tokens_seen": 2764570624 }, { "epoch": 0.84, "learning_rate": 0.0001637778847696999, "loss": 0.064, "theoretical_loss": 3.3703114588488097, "tokens_seen": 2764832768 }, { "epoch": 0.84, "learning_rate": 0.00016369764082811746, "loss": 0.0611, "theoretical_loss": 3.3702866926044903, "tokens_seen": 2765094912 }, { "epoch": 0.84, "learning_rate": 0.00016361739688653507, "loss": 0.0647, "theoretical_loss": 3.3702619293653724, "tokens_seen": 2765357056 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.00030220934422686696, "objective/train/docs_used": 1004597, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.231160283088684, "objective/train/original_loss": 1.231160044670105, "objective/train/theoretical_loss": 3.370237169130807, "objective/train/tokens_used": 2786079200, "objective/train/value_avg": -0.006072998046875, "objective/train/value_loss": 0.00020748592214658856, "objective/train/value_max": -2.5093555450439453e-05, "objective/train/value_min": -0.3544921875, "objective/train/value_reward_corr": 0.6472541412830914, "objective/train/value_std": 0.0118408203125, "objective/train/weight_avg": 1.0003962516784668, "objective/train/weighted_lm_loss": 1.23166823387146, "objective/train/weights_max": 1.314475178718567, "objective/train/weights_min": 0.3690946400165558, "theoretical_loss": 3.370237169130807, "tokens_seen": 2765619200 }, { "epoch": 0.84, "learning_rate": 0.00016353715294495266, "loss": 0.0636, "theoretical_loss": 3.370237169130807, "tokens_seen": 2765619200 }, { "epoch": 0.84, "learning_rate": 0.00016345690900337024, "loss": 0.0639, "theoretical_loss": 3.3702124119001446, "tokens_seen": 2765881344 }, { "epoch": 0.84, "learning_rate": 0.00016337666506178783, "loss": 0.0659, "theoretical_loss": 3.3701876576727368, "tokens_seen": 2766143488 }, { "epoch": 0.84, "learning_rate": 0.00016329642112020544, "loss": 0.0657, "theoretical_loss": 3.3701629064479333, "tokens_seen": 2766405632 }, { "epoch": 0.84, "learning_rate": 0.00016321617717862303, "loss": 0.0662, "theoretical_loss": 3.370138158225087, "tokens_seen": 2766667776 }, { "epoch": 0.84, "learning_rate": 0.00016313593323704059, "loss": 0.0639, "theoretical_loss": 3.370113413003548, "tokens_seen": 2766929920 }, { "epoch": 0.84, "learning_rate": 0.0001630556892954582, "loss": 0.0641, "theoretical_loss": 3.3700886707826694, "tokens_seen": 2767192064 }, { "epoch": 0.84, "learning_rate": 0.00016297544535387578, "loss": 0.0652, "theoretical_loss": 3.370063931561802, "tokens_seen": 2767454208 }, { "epoch": 0.84, "learning_rate": 0.0001628952014122934, "loss": 0.0627, "theoretical_loss": 3.3700391953402984, "tokens_seen": 2767716352 }, { "epoch": 0.84, "learning_rate": 0.00016281495747071096, "loss": 0.0661, "theoretical_loss": 3.3700144621175108, "tokens_seen": 2767978496 }, { "epoch": 0.84, "learning_rate": 0.00016273471352912854, "loss": 0.0638, "theoretical_loss": 3.369989731892792, "tokens_seen": 2768240640 }, { "epoch": 0.84, "learning_rate": 0.00016265446958754615, "loss": 0.0635, "theoretical_loss": 3.3699650046654948, "tokens_seen": 2768502784 }, { "epoch": 0.84, "learning_rate": 0.00016257422564596374, "loss": 0.0642, "theoretical_loss": 3.3699402804349714, "tokens_seen": 2768764928 }, { "epoch": 0.84, "learning_rate": 0.00016249398170438132, "loss": 0.0679, "theoretical_loss": 3.3699155592005754, "tokens_seen": 2769027072 }, { "epoch": 0.84, "learning_rate": 0.0001624137377627989, "loss": 0.066, "theoretical_loss": 3.36989084096166, "tokens_seen": 2769289216 }, { "epoch": 0.84, "learning_rate": 0.00016233349382121652, "loss": 0.068, "theoretical_loss": 3.3698661257175795, "tokens_seen": 2769551360 }, { "epoch": 0.84, "learning_rate": 0.00016225324987963408, "loss": 0.0671, "theoretical_loss": 3.369841413467687, "tokens_seen": 2769813504 }, { "epoch": 0.84, "learning_rate": 0.00016217300593805167, "loss": 0.0644, "theoretical_loss": 3.3698167042113356, "tokens_seen": 2770075648 }, { "epoch": 0.84, "learning_rate": 0.00016209276199646928, "loss": 0.0664, "theoretical_loss": 3.369791997947881, "tokens_seen": 2770337792 }, { "epoch": 0.84, "learning_rate": 0.00016201251805488686, "loss": 0.0651, "theoretical_loss": 3.3697672946766764, "tokens_seen": 2770599936 }, { "epoch": 0.84, "learning_rate": 0.00016193227411330445, "loss": 0.0655, "theoretical_loss": 3.369742594397077, "tokens_seen": 2770862080 }, { "epoch": 0.84, "learning_rate": 0.00016185203017172204, "loss": 0.0631, "theoretical_loss": 3.369717897108437, "tokens_seen": 2771124224 }, { "epoch": 0.84, "learning_rate": 0.00016177178623013962, "loss": 0.0646, "theoretical_loss": 3.3696932028101116, "tokens_seen": 2771386368 }, { "epoch": 0.84, "learning_rate": 0.00016169154228855723, "loss": 0.0656, "theoretical_loss": 3.369668511501456, "tokens_seen": 2771648512 }, { "epoch": 0.84, "learning_rate": 0.0001616112983469748, "loss": 0.0642, "theoretical_loss": 3.3696438231818258, "tokens_seen": 2771910656 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.0006500775343738496, "objective/train/docs_used": 1006627, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2970798015594482, "objective/train/original_loss": 1.2970798015594482, "objective/train/theoretical_loss": 3.369619137850576, "objective/train/tokens_used": 2792632800, "objective/train/value_avg": -0.00782012939453125, "objective/train/value_loss": 0.0002618881990201771, "objective/train/value_max": -1.8656253814697266e-05, "objective/train/value_min": -0.422607421875, "objective/train/value_reward_corr": 0.6451092115126782, "objective/train/value_std": 0.01337432861328125, "objective/train/weight_avg": 1.0007460117340088, "objective/train/weighted_lm_loss": 1.2972346544265747, "objective/train/weights_max": 1.3180100917816162, "objective/train/weights_min": 0.05597710609436035, "theoretical_loss": 3.369619137850576, "tokens_seen": 2772172800 }, { "epoch": 0.84, "learning_rate": 0.0001615310544053924, "loss": 0.0682, "theoretical_loss": 3.369619137850576, "tokens_seen": 2772172800 }, { "epoch": 0.84, "learning_rate": 0.00016145081046381, "loss": 0.0639, "theoretical_loss": 3.3695944555070625, "tokens_seen": 2772434944 }, { "epoch": 0.84, "learning_rate": 0.00016137056652222758, "loss": 0.062, "theoretical_loss": 3.3695697761506413, "tokens_seen": 2772697088 }, { "epoch": 0.84, "learning_rate": 0.00016129032258064516, "loss": 0.0647, "theoretical_loss": 3.3695450997806686, "tokens_seen": 2772959232 }, { "epoch": 0.84, "learning_rate": 0.00016121007863906275, "loss": 0.0653, "theoretical_loss": 3.369520426396501, "tokens_seen": 2773221376 }, { "epoch": 0.84, "learning_rate": 0.00016112983469748036, "loss": 0.0659, "theoretical_loss": 3.3694957559974936, "tokens_seen": 2773483520 }, { "epoch": 0.84, "learning_rate": 0.00016104959075589792, "loss": 0.063, "theoretical_loss": 3.3694710885830053, "tokens_seen": 2773745664 }, { "epoch": 0.84, "learning_rate": 0.00016096934681431553, "loss": 0.0674, "theoretical_loss": 3.3694464241523914, "tokens_seen": 2774007808 }, { "epoch": 0.84, "learning_rate": 0.00016088910287273312, "loss": 0.0647, "theoretical_loss": 3.36942176270501, "tokens_seen": 2774269952 }, { "epoch": 0.84, "learning_rate": 0.0001608088589311507, "loss": 0.0626, "theoretical_loss": 3.3693971042402175, "tokens_seen": 2774532096 }, { "epoch": 0.84, "learning_rate": 0.0001607286149895683, "loss": 0.0624, "theoretical_loss": 3.369372448757372, "tokens_seen": 2774794240 }, { "epoch": 0.84, "learning_rate": 0.00016064837104798587, "loss": 0.0637, "theoretical_loss": 3.3693477962558314, "tokens_seen": 2775056384 }, { "epoch": 0.84, "learning_rate": 0.00016056812710640348, "loss": 0.0636, "theoretical_loss": 3.3693231467349536, "tokens_seen": 2775318528 }, { "epoch": 0.84, "learning_rate": 0.00016048788316482104, "loss": 0.066, "theoretical_loss": 3.3692985001940965, "tokens_seen": 2775580672 }, { "epoch": 0.84, "learning_rate": 0.00016040763922323866, "loss": 0.0647, "theoretical_loss": 3.369273856632618, "tokens_seen": 2775842816 }, { "epoch": 0.84, "learning_rate": 0.00016032739528165624, "loss": 0.0653, "theoretical_loss": 3.3692492160498775, "tokens_seen": 2776104960 }, { "epoch": 0.84, "learning_rate": 0.00016024715134007383, "loss": 0.0641, "theoretical_loss": 3.369224578445233, "tokens_seen": 2776367104 }, { "epoch": 0.84, "learning_rate": 0.0001601669073984914, "loss": 0.0634, "theoretical_loss": 3.369199943818044, "tokens_seen": 2776629248 }, { "epoch": 0.84, "learning_rate": 0.000160086663456909, "loss": 0.064, "theoretical_loss": 3.3691753121676693, "tokens_seen": 2776891392 }, { "epoch": 0.84, "learning_rate": 0.0001600064195153266, "loss": 0.0676, "theoretical_loss": 3.3691506834934684, "tokens_seen": 2777153536 }, { "epoch": 0.84, "learning_rate": 0.0001599261755737442, "loss": 0.0652, "theoretical_loss": 3.3691260577948006, "tokens_seen": 2777415680 }, { "epoch": 0.84, "learning_rate": 0.00015984593163216175, "loss": 0.0639, "theoretical_loss": 3.3691014350710256, "tokens_seen": 2777677824 }, { "epoch": 0.84, "learning_rate": 0.00015976568769057937, "loss": 0.0638, "theoretical_loss": 3.3690768153215034, "tokens_seen": 2777939968 }, { "epoch": 0.84, "learning_rate": 0.00015968544374899695, "loss": 0.0645, "theoretical_loss": 3.3690521985455946, "tokens_seen": 2778202112 }, { "epoch": 0.84, "learning_rate": 0.00015960519980741454, "loss": 0.064, "theoretical_loss": 3.3690275847426587, "tokens_seen": 2778464256 }, { "epoch": 0.84, "objective/train/advantage_avg": -0.0006413300288841128, "objective/train/docs_used": 1009547, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2469478845596313, "objective/train/original_loss": 1.246948003768921, "objective/train/theoretical_loss": 3.3690029739120564, "objective/train/tokens_used": 2799186400, "objective/train/value_avg": -0.00994110107421875, "objective/train/value_loss": 0.00023580322158522904, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.44580078125, "objective/train/value_reward_corr": 0.7511882765787193, "objective/train/value_std": 0.0173492431640625, "objective/train/weight_avg": 0.9994668960571289, "objective/train/weighted_lm_loss": 1.2461657524108887, "objective/train/weights_max": 1.2655079364776611, "objective/train/weights_min": 0.3680661618709564, "theoretical_loss": 3.3690029739120564, "tokens_seen": 2778726400 }, { "epoch": 0.84, "learning_rate": 0.00015952495586583212, "loss": 0.0672, "theoretical_loss": 3.3690029739120564, "tokens_seen": 2778726400 }, { "epoch": 0.84, "learning_rate": 0.00015944471192424974, "loss": 0.0663, "theoretical_loss": 3.368978366053149, "tokens_seen": 2778988544 }, { "epoch": 0.84, "learning_rate": 0.00015936446798266732, "loss": 0.0611, "theoretical_loss": 3.3689537611652964, "tokens_seen": 2779250688 }, { "epoch": 0.84, "learning_rate": 0.00015928422404108488, "loss": 0.066, "theoretical_loss": 3.3689291592478607, "tokens_seen": 2779512832 }, { "epoch": 0.84, "learning_rate": 0.0001592039800995025, "loss": 0.064, "theoretical_loss": 3.3689045603002024, "tokens_seen": 2779774976 }, { "epoch": 0.84, "learning_rate": 0.00015912373615792008, "loss": 0.062, "theoretical_loss": 3.3688799643216836, "tokens_seen": 2780037120 }, { "epoch": 0.84, "learning_rate": 0.0001590434922163377, "loss": 0.0655, "theoretical_loss": 3.368855371311666, "tokens_seen": 2780299264 }, { "epoch": 0.84, "learning_rate": 0.00015896324827475525, "loss": 0.0659, "theoretical_loss": 3.368830781269511, "tokens_seen": 2780561408 }, { "epoch": 0.84, "learning_rate": 0.00015888300433317283, "loss": 0.0647, "theoretical_loss": 3.3688061941945806, "tokens_seen": 2780823552 }, { "epoch": 0.84, "learning_rate": 0.00015880276039159045, "loss": 0.0649, "theoretical_loss": 3.3687816100862378, "tokens_seen": 2781085696 }, { "epoch": 0.84, "learning_rate": 0.000158722516450008, "loss": 0.0676, "theoretical_loss": 3.368757028943845, "tokens_seen": 2781347840 }, { "epoch": 0.84, "learning_rate": 0.00015864227250842562, "loss": 0.0658, "theoretical_loss": 3.3687324507667644, "tokens_seen": 2781609984 }, { "epoch": 0.84, "learning_rate": 0.0001585620285668432, "loss": 0.0655, "theoretical_loss": 3.368707875554359, "tokens_seen": 2781872128 }, { "epoch": 0.84, "learning_rate": 0.00015848178462526082, "loss": 0.0652, "theoretical_loss": 3.3686833033059917, "tokens_seen": 2782134272 }, { "epoch": 0.84, "learning_rate": 0.00015840154068367837, "loss": 0.0646, "theoretical_loss": 3.368658734021026, "tokens_seen": 2782396416 }, { "epoch": 0.84, "learning_rate": 0.00015832129674209596, "loss": 0.064, "theoretical_loss": 3.3686341676988256, "tokens_seen": 2782658560 }, { "epoch": 0.84, "learning_rate": 0.00015824105280051357, "loss": 0.0664, "theoretical_loss": 3.3686096043387543, "tokens_seen": 2782920704 }, { "epoch": 0.84, "learning_rate": 0.00015816080885893116, "loss": 0.0649, "theoretical_loss": 3.3685850439401754, "tokens_seen": 2783182848 }, { "epoch": 0.84, "learning_rate": 0.00015808056491734874, "loss": 0.0656, "theoretical_loss": 3.368560486502453, "tokens_seen": 2783444992 }, { "epoch": 0.84, "learning_rate": 0.00015800032097576633, "loss": 0.0668, "theoretical_loss": 3.3685359320249515, "tokens_seen": 2783707136 }, { "epoch": 0.84, "learning_rate": 0.00015792007703418394, "loss": 0.0625, "theoretical_loss": 3.3685113805070355, "tokens_seen": 2783969280 }, { "epoch": 0.84, "learning_rate": 0.0001578398330926015, "loss": 0.0628, "theoretical_loss": 3.3684868319480694, "tokens_seen": 2784231424 }, { "epoch": 0.84, "learning_rate": 0.00015775958915101909, "loss": 0.0654, "theoretical_loss": 3.3684622863474183, "tokens_seen": 2784493568 }, { "epoch": 0.84, "learning_rate": 0.0001576793452094367, "loss": 0.0655, "theoretical_loss": 3.368437743704447, "tokens_seen": 2784755712 }, { "epoch": 0.84, "learning_rate": 0.00015759910126785428, "loss": 0.0668, "theoretical_loss": 3.3684132040185206, "tokens_seen": 2785017856 }, { "epoch": 0.84, "objective/train/advantage_avg": 2.9359511245274916e-05, "objective/train/docs_used": 1011592, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3140312433242798, "objective/train/original_loss": 1.3140308856964111, "objective/train/theoretical_loss": 3.3683886672890053, "objective/train/tokens_used": 2805740000, "objective/train/value_avg": -0.007099151611328125, "objective/train/value_loss": 0.0002247738157166168, "objective/train/value_max": -2.7954578399658203e-05, "objective/train/value_min": -0.99365234375, "objective/train/value_reward_corr": 0.6858024849139678, "objective/train/value_std": 0.014373779296875, "objective/train/weight_avg": 1.0001325607299805, "objective/train/weighted_lm_loss": 1.3137497901916504, "objective/train/weights_max": 2.3321189880371094, "objective/train/weights_min": 0.36934465169906616, "theoretical_loss": 3.3683886672890053, "tokens_seen": 2785280000 }, { "epoch": 0.84, "learning_rate": 0.00015751885732627187, "loss": 0.0649, "theoretical_loss": 3.3683886672890053, "tokens_seen": 2785280000 }, { "epoch": 0.84, "learning_rate": 0.00015743861338468946, "loss": 0.0661, "theoretical_loss": 3.368364133515266, "tokens_seen": 2785542144 }, { "epoch": 0.84, "learning_rate": 0.00015735836944310704, "loss": 0.0642, "theoretical_loss": 3.3683396026966688, "tokens_seen": 2785804288 }, { "epoch": 0.84, "learning_rate": 0.00015727812550152465, "loss": 0.0664, "theoretical_loss": 3.368315074832579, "tokens_seen": 2786066432 }, { "epoch": 0.84, "learning_rate": 0.0001571978815599422, "loss": 0.0639, "theoretical_loss": 3.3682905499223645, "tokens_seen": 2786328576 }, { "epoch": 0.84, "learning_rate": 0.00015711763761835982, "loss": 0.0654, "theoretical_loss": 3.36826602796539, "tokens_seen": 2786590720 }, { "epoch": 0.84, "learning_rate": 0.0001570373936767774, "loss": 0.0654, "theoretical_loss": 3.368241508961023, "tokens_seen": 2786852864 }, { "epoch": 0.84, "learning_rate": 0.00015695714973519502, "loss": 0.0637, "theoretical_loss": 3.36821699290863, "tokens_seen": 2787115008 }, { "epoch": 0.84, "learning_rate": 0.00015687690579361258, "loss": 0.0665, "theoretical_loss": 3.368192479807578, "tokens_seen": 2787377152 }, { "epoch": 0.84, "learning_rate": 0.00015679666185203017, "loss": 0.0667, "theoretical_loss": 3.368167969657234, "tokens_seen": 2787639296 }, { "epoch": 0.84, "learning_rate": 0.00015671641791044778, "loss": 0.0642, "theoretical_loss": 3.368143462456966, "tokens_seen": 2787901440 }, { "epoch": 0.84, "learning_rate": 0.00015663617396886534, "loss": 0.0618, "theoretical_loss": 3.3681189582061415, "tokens_seen": 2788163584 }, { "epoch": 0.85, "learning_rate": 0.00015655593002728295, "loss": 0.0657, "theoretical_loss": 3.3680944569041276, "tokens_seen": 2788425728 }, { "epoch": 0.85, "learning_rate": 0.00015647568608570054, "loss": 0.064, "theoretical_loss": 3.3680699585502927, "tokens_seen": 2788687872 }, { "epoch": 0.85, "learning_rate": 0.00015639544214411812, "loss": 0.0629, "theoretical_loss": 3.3680454631440053, "tokens_seen": 2788950016 }, { "epoch": 0.85, "learning_rate": 0.0001563151982025357, "loss": 0.0672, "theoretical_loss": 3.368020970684633, "tokens_seen": 2789212160 }, { "epoch": 0.85, "learning_rate": 0.0001562349542609533, "loss": 0.0638, "theoretical_loss": 3.367996481171545, "tokens_seen": 2789474304 }, { "epoch": 0.85, "learning_rate": 0.0001561547103193709, "loss": 0.0678, "theoretical_loss": 3.3679719946041096, "tokens_seen": 2789736448 }, { "epoch": 0.85, "learning_rate": 0.0001560744663777885, "loss": 0.0673, "theoretical_loss": 3.367947510981696, "tokens_seen": 2789998592 }, { "epoch": 0.85, "learning_rate": 0.00015599422243620608, "loss": 0.0656, "theoretical_loss": 3.3679230303036736, "tokens_seen": 2790260736 }, { "epoch": 0.85, "learning_rate": 0.00015591397849462366, "loss": 0.0649, "theoretical_loss": 3.367898552569411, "tokens_seen": 2790522880 }, { "epoch": 0.85, "learning_rate": 0.00015583373455304125, "loss": 0.0646, "theoretical_loss": 3.3678740777782785, "tokens_seen": 2790785024 }, { "epoch": 0.85, "learning_rate": 0.00015575349061145883, "loss": 0.0647, "theoretical_loss": 3.367849605929645, "tokens_seen": 2791047168 }, { "epoch": 0.85, "learning_rate": 0.00015567324666987642, "loss": 0.0651, "theoretical_loss": 3.3678251370228813, "tokens_seen": 2791309312 }, { "epoch": 0.85, "learning_rate": 0.00015559300272829403, "loss": 0.0667, "theoretical_loss": 3.367800671057357, "tokens_seen": 2791571456 }, { "epoch": 0.85, "objective/train/advantage_avg": -2.4366694560740143e-05, "objective/train/docs_used": 1014349, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3677260875701904, "objective/train/original_loss": 1.3677259683609009, "objective/train/theoretical_loss": 3.3677762080324425, "objective/train/tokens_used": 2812293600, "objective/train/value_avg": -0.006603240966796875, "objective/train/value_loss": 0.0001228439505212009, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.468505859375, "objective/train/value_reward_corr": 0.7589092297420921, "objective/train/value_std": 0.012939453125, "objective/train/weight_avg": 1.000034213066101, "objective/train/weighted_lm_loss": 1.3680479526519775, "objective/train/weights_max": 1.1594135761260986, "objective/train/weights_min": 0.38203245401382446, "theoretical_loss": 3.3677762080324425, "tokens_seen": 2791833600 }, { "epoch": 0.85, "learning_rate": 0.00015551275878671162, "loss": 0.0673, "theoretical_loss": 3.3677762080324425, "tokens_seen": 2791833600 }, { "epoch": 0.85, "learning_rate": 0.00015543251484512917, "loss": 0.0626, "theoretical_loss": 3.3677517479475085, "tokens_seen": 2792095744 }, { "epoch": 0.85, "learning_rate": 0.0001553522709035468, "loss": 0.0664, "theoretical_loss": 3.367727290801925, "tokens_seen": 2792357888 }, { "epoch": 0.85, "learning_rate": 0.00015527202696196437, "loss": 0.0667, "theoretical_loss": 3.367702836595064, "tokens_seen": 2792620032 }, { "epoch": 0.85, "learning_rate": 0.00015519178302038198, "loss": 0.0654, "theoretical_loss": 3.367678385326296, "tokens_seen": 2792882176 }, { "epoch": 0.85, "learning_rate": 0.00015511153907879954, "loss": 0.0642, "theoretical_loss": 3.3676539369949916, "tokens_seen": 2793144320 }, { "epoch": 0.85, "learning_rate": 0.00015503129513721716, "loss": 0.064, "theoretical_loss": 3.367629491600524, "tokens_seen": 2793406464 }, { "epoch": 0.85, "learning_rate": 0.00015495105119563474, "loss": 0.0648, "theoretical_loss": 3.3676050491422624, "tokens_seen": 2793668608 }, { "epoch": 0.85, "learning_rate": 0.0001548708072540523, "loss": 0.0643, "theoretical_loss": 3.367580609619581, "tokens_seen": 2793930752 }, { "epoch": 0.85, "learning_rate": 0.0001547905633124699, "loss": 0.0645, "theoretical_loss": 3.3675561730318506, "tokens_seen": 2794192896 }, { "epoch": 0.85, "learning_rate": 0.0001547103193708875, "loss": 0.0656, "theoretical_loss": 3.3675317393784434, "tokens_seen": 2794455040 }, { "epoch": 0.85, "learning_rate": 0.0001546300754293051, "loss": 0.0663, "theoretical_loss": 3.3675073086587326, "tokens_seen": 2794717184 }, { "epoch": 0.85, "learning_rate": 0.00015454983148772267, "loss": 0.0659, "theoretical_loss": 3.3674828808720902, "tokens_seen": 2794979328 }, { "epoch": 0.85, "learning_rate": 0.00015446958754614025, "loss": 0.0619, "theoretical_loss": 3.3674584560178893, "tokens_seen": 2795241472 }, { "epoch": 0.85, "learning_rate": 0.00015438934360455787, "loss": 0.0667, "theoretical_loss": 3.3674340340955027, "tokens_seen": 2795503616 }, { "epoch": 0.85, "learning_rate": 0.00015430909966297545, "loss": 0.0655, "theoretical_loss": 3.367409615104304, "tokens_seen": 2795765760 }, { "epoch": 0.85, "learning_rate": 0.00015422885572139304, "loss": 0.0646, "theoretical_loss": 3.3673851990436656, "tokens_seen": 2796027904 }, { "epoch": 0.85, "learning_rate": 0.00015414861177981062, "loss": 0.0646, "theoretical_loss": 3.3673607859129624, "tokens_seen": 2796290048 }, { "epoch": 0.85, "learning_rate": 0.00015406836783822824, "loss": 0.0637, "theoretical_loss": 3.3673363757115675, "tokens_seen": 2796552192 }, { "epoch": 0.85, "learning_rate": 0.0001539881238966458, "loss": 0.0666, "theoretical_loss": 3.367311968438855, "tokens_seen": 2796814336 }, { "epoch": 0.85, "learning_rate": 0.00015390787995506338, "loss": 0.066, "theoretical_loss": 3.367287564094199, "tokens_seen": 2797076480 }, { "epoch": 0.85, "learning_rate": 0.000153827636013481, "loss": 0.0663, "theoretical_loss": 3.3672631626769736, "tokens_seen": 2797338624 }, { "epoch": 0.85, "learning_rate": 0.00015374739207189858, "loss": 0.0667, "theoretical_loss": 3.367238764186554, "tokens_seen": 2797600768 }, { "epoch": 0.85, "learning_rate": 0.00015366714813031616, "loss": 0.0665, "theoretical_loss": 3.3672143686223146, "tokens_seen": 2797862912 }, { "epoch": 0.85, "learning_rate": 0.00015358690418873375, "loss": 0.066, "theoretical_loss": 3.36718997598363, "tokens_seen": 2798125056 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.0007134893676266074, "objective/train/docs_used": 1016369, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2880847454071045, "objective/train/original_loss": 1.288084626197815, "objective/train/theoretical_loss": 3.367165586269876, "objective/train/tokens_used": 2818847200, "objective/train/value_avg": -0.00850677490234375, "objective/train/value_loss": 0.0001657002285355702, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.268798828125, "objective/train/value_reward_corr": 0.7366273106085361, "objective/train/value_std": 0.01476287841796875, "objective/train/weight_avg": 1.0007914304733276, "objective/train/weighted_lm_loss": 1.2886732816696167, "objective/train/weights_max": 1.2570312023162842, "objective/train/weights_min": 0.37699440121650696, "theoretical_loss": 3.367165586269876, "tokens_seen": 2798387200 }, { "epoch": 0.85, "learning_rate": 0.00015350666024715133, "loss": 0.0639, "theoretical_loss": 3.367165586269876, "tokens_seen": 2798387200 }, { "epoch": 0.85, "learning_rate": 0.00015342641630556895, "loss": 0.0654, "theoretical_loss": 3.3671411994804274, "tokens_seen": 2798649344 }, { "epoch": 0.85, "learning_rate": 0.0001533461723639865, "loss": 0.0621, "theoretical_loss": 3.36711681561466, "tokens_seen": 2798911488 }, { "epoch": 0.85, "learning_rate": 0.00015326592842240412, "loss": 0.0655, "theoretical_loss": 3.3670924346719495, "tokens_seen": 2799173632 }, { "epoch": 0.85, "learning_rate": 0.0001531856844808217, "loss": 0.0655, "theoretical_loss": 3.3670680566516715, "tokens_seen": 2799435776 }, { "epoch": 0.85, "learning_rate": 0.0001531054405392393, "loss": 0.0628, "theoretical_loss": 3.367043681553202, "tokens_seen": 2799697920 }, { "epoch": 0.85, "learning_rate": 0.00015302519659765687, "loss": 0.0683, "theoretical_loss": 3.367019309375918, "tokens_seen": 2799960064 }, { "epoch": 0.85, "learning_rate": 0.00015294495265607446, "loss": 0.0665, "theoretical_loss": 3.3669949401191954, "tokens_seen": 2800222208 }, { "epoch": 0.85, "learning_rate": 0.00015286470871449207, "loss": 0.0651, "theoretical_loss": 3.366970573782411, "tokens_seen": 2800484352 }, { "epoch": 0.85, "learning_rate": 0.00015278446477290963, "loss": 0.0624, "theoretical_loss": 3.366946210364942, "tokens_seen": 2800746496 }, { "epoch": 0.85, "learning_rate": 0.00015270422083132724, "loss": 0.0657, "theoretical_loss": 3.3669218498661646, "tokens_seen": 2801008640 }, { "epoch": 0.85, "learning_rate": 0.00015262397688974483, "loss": 0.0643, "theoretical_loss": 3.3668974922854567, "tokens_seen": 2801270784 }, { "epoch": 0.85, "learning_rate": 0.00015254373294816242, "loss": 0.0685, "theoretical_loss": 3.366873137622196, "tokens_seen": 2801532928 }, { "epoch": 0.85, "learning_rate": 0.00015246348900658, "loss": 0.0679, "theoretical_loss": 3.366848785875759, "tokens_seen": 2801795072 }, { "epoch": 0.85, "learning_rate": 0.00015238324506499759, "loss": 0.0669, "theoretical_loss": 3.3668244370455245, "tokens_seen": 2802057216 }, { "epoch": 0.85, "learning_rate": 0.0001523030011234152, "loss": 0.0663, "theoretical_loss": 3.3668000911308704, "tokens_seen": 2802319360 }, { "epoch": 0.85, "learning_rate": 0.00015222275718183278, "loss": 0.0625, "theoretical_loss": 3.3667757481311744, "tokens_seen": 2802581504 }, { "epoch": 0.85, "learning_rate": 0.00015214251324025037, "loss": 0.064, "theoretical_loss": 3.3667514080458156, "tokens_seen": 2802843648 }, { "epoch": 0.85, "learning_rate": 0.00015206226929866796, "loss": 0.0668, "theoretical_loss": 3.3667270708741714, "tokens_seen": 2803105792 }, { "epoch": 0.85, "learning_rate": 0.00015198202535708554, "loss": 0.0659, "theoretical_loss": 3.366702736615622, "tokens_seen": 2803367936 }, { "epoch": 0.85, "learning_rate": 0.00015190178141550313, "loss": 0.0663, "theoretical_loss": 3.3666784052695453, "tokens_seen": 2803630080 }, { "epoch": 0.85, "learning_rate": 0.0001518215374739207, "loss": 0.0632, "theoretical_loss": 3.366654076835321, "tokens_seen": 2803892224 }, { "epoch": 0.85, "learning_rate": 0.00015174129353233832, "loss": 0.0671, "theoretical_loss": 3.366629751312328, "tokens_seen": 2804154368 }, { "epoch": 0.85, "learning_rate": 0.0001516610495907559, "loss": 0.0648, "theoretical_loss": 3.3666054286999465, "tokens_seen": 2804416512 }, { "epoch": 0.85, "learning_rate": 0.00015158080564917347, "loss": 0.0667, "theoretical_loss": 3.3665811089975555, "tokens_seen": 2804678656 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.00020279214368201792, "objective/train/docs_used": 1018816, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3492354154586792, "objective/train/original_loss": 1.3492355346679688, "objective/train/theoretical_loss": 3.366556792204535, "objective/train/tokens_used": 2825400800, "objective/train/value_avg": -0.008026123046875, "objective/train/value_loss": 0.00025738292606547475, "objective/train/value_max": -3.1948089599609375e-05, "objective/train/value_min": -0.366455078125, "objective/train/value_reward_corr": 0.7410373552381239, "objective/train/value_std": 0.017486572265625, "objective/train/weight_avg": 1.0003200769424438, "objective/train/weighted_lm_loss": 1.3495210409164429, "objective/train/weights_max": 1.3756163120269775, "objective/train/weights_min": 0.3690573275089264, "theoretical_loss": 3.366556792204535, "tokens_seen": 2804940800 }, { "epoch": 0.85, "learning_rate": 0.00015150056170759108, "loss": 0.0646, "theoretical_loss": 3.366556792204535, "tokens_seen": 2804940800 }, { "epoch": 0.85, "learning_rate": 0.00015142031776600867, "loss": 0.0661, "theoretical_loss": 3.3665324783202655, "tokens_seen": 2805202944 }, { "epoch": 0.85, "learning_rate": 0.00015134007382442628, "loss": 0.0652, "theoretical_loss": 3.3665081673441275, "tokens_seen": 2805465088 }, { "epoch": 0.85, "learning_rate": 0.00015125982988284384, "loss": 0.0665, "theoretical_loss": 3.3664838592755006, "tokens_seen": 2805727232 }, { "epoch": 0.85, "learning_rate": 0.00015117958594126145, "loss": 0.0663, "theoretical_loss": 3.366459554113766, "tokens_seen": 2805989376 }, { "epoch": 0.85, "learning_rate": 0.00015109934199967904, "loss": 0.0642, "theoretical_loss": 3.3664352518583045, "tokens_seen": 2806251520 }, { "epoch": 0.85, "learning_rate": 0.0001510190980580966, "loss": 0.0645, "theoretical_loss": 3.366410952508497, "tokens_seen": 2806513664 }, { "epoch": 0.85, "learning_rate": 0.0001509388541165142, "loss": 0.0639, "theoretical_loss": 3.366386656063725, "tokens_seen": 2806775808 }, { "epoch": 0.85, "learning_rate": 0.0001508586101749318, "loss": 0.0663, "theoretical_loss": 3.3663623625233696, "tokens_seen": 2807037952 }, { "epoch": 0.85, "learning_rate": 0.0001507783662333494, "loss": 0.061, "theoretical_loss": 3.3663380718868128, "tokens_seen": 2807300096 }, { "epoch": 0.85, "learning_rate": 0.00015069812229176696, "loss": 0.0669, "theoretical_loss": 3.3663137841534363, "tokens_seen": 2807562240 }, { "epoch": 0.85, "learning_rate": 0.00015061787835018455, "loss": 0.0646, "theoretical_loss": 3.3662894993226216, "tokens_seen": 2807824384 }, { "epoch": 0.85, "learning_rate": 0.00015053763440860216, "loss": 0.0665, "theoretical_loss": 3.3662652173937517, "tokens_seen": 2808086528 }, { "epoch": 0.85, "learning_rate": 0.00015045739046701975, "loss": 0.0671, "theoretical_loss": 3.366240938366208, "tokens_seen": 2808348672 }, { "epoch": 0.85, "learning_rate": 0.00015037714652543733, "loss": 0.065, "theoretical_loss": 3.366216662239374, "tokens_seen": 2808610816 }, { "epoch": 0.85, "learning_rate": 0.00015029690258385492, "loss": 0.0644, "theoretical_loss": 3.3661923890126326, "tokens_seen": 2808872960 }, { "epoch": 0.85, "learning_rate": 0.00015021665864227253, "loss": 0.0662, "theoretical_loss": 3.366168118685365, "tokens_seen": 2809135104 }, { "epoch": 0.85, "learning_rate": 0.0001501364147006901, "loss": 0.0639, "theoretical_loss": 3.3661438512569566, "tokens_seen": 2809397248 }, { "epoch": 0.85, "learning_rate": 0.00015005617075910767, "loss": 0.0665, "theoretical_loss": 3.3661195867267892, "tokens_seen": 2809659392 }, { "epoch": 0.85, "learning_rate": 0.0001499759268175253, "loss": 0.0631, "theoretical_loss": 3.3660953250942467, "tokens_seen": 2809921536 }, { "epoch": 0.85, "learning_rate": 0.00014989568287594287, "loss": 0.0631, "theoretical_loss": 3.366071066358713, "tokens_seen": 2810183680 }, { "epoch": 0.85, "learning_rate": 0.00014981543893436046, "loss": 0.0628, "theoretical_loss": 3.3660468105195713, "tokens_seen": 2810445824 }, { "epoch": 0.85, "learning_rate": 0.00014973519499277804, "loss": 0.0618, "theoretical_loss": 3.3660225575762066, "tokens_seen": 2810707968 }, { "epoch": 0.85, "learning_rate": 0.00014965495105119563, "loss": 0.0654, "theoretical_loss": 3.365998307528003, "tokens_seen": 2810970112 }, { "epoch": 0.85, "learning_rate": 0.00014957470710961324, "loss": 0.0684, "theoretical_loss": 3.3659740603743447, "tokens_seen": 2811232256 }, { "epoch": 0.85, "objective/train/advantage_avg": 2.8910437322338112e-05, "objective/train/docs_used": 1021297, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2576926946640015, "objective/train/original_loss": 1.2576926946640015, "objective/train/theoretical_loss": 3.365949816114616, "objective/train/tokens_used": 2831954400, "objective/train/value_avg": -0.00836181640625, "objective/train/value_loss": 0.0002257737360196188, "objective/train/value_max": -2.300739288330078e-05, "objective/train/value_min": -0.485595703125, "objective/train/value_reward_corr": 0.7908450645758498, "objective/train/value_std": 0.0178680419921875, "objective/train/weight_avg": 1.0001318454742432, "objective/train/weighted_lm_loss": 1.2580550909042358, "objective/train/weights_max": 1.375653624534607, "objective/train/weights_min": 0.3706671893596649, "theoretical_loss": 3.365949816114616, "tokens_seen": 2811494400 }, { "epoch": 0.85, "learning_rate": 0.0001494944631680308, "loss": 0.0671, "theoretical_loss": 3.365949816114616, "tokens_seen": 2811494400 }, { "epoch": 0.85, "learning_rate": 0.0001494142192264484, "loss": 0.0656, "theoretical_loss": 3.3659255747482018, "tokens_seen": 2811756544 }, { "epoch": 0.85, "learning_rate": 0.000149333975284866, "loss": 0.0634, "theoretical_loss": 3.365901336274488, "tokens_seen": 2812018688 }, { "epoch": 0.85, "learning_rate": 0.00014925373134328358, "loss": 0.0651, "theoretical_loss": 3.3658771006928587, "tokens_seen": 2812280832 }, { "epoch": 0.85, "learning_rate": 0.00014917348740170117, "loss": 0.0655, "theoretical_loss": 3.3658528680027002, "tokens_seen": 2812542976 }, { "epoch": 0.85, "learning_rate": 0.00014909324346011875, "loss": 0.0657, "theoretical_loss": 3.3658286382033973, "tokens_seen": 2812805120 }, { "epoch": 0.85, "learning_rate": 0.00014901299951853637, "loss": 0.0652, "theoretical_loss": 3.365804411294336, "tokens_seen": 2813067264 }, { "epoch": 0.85, "learning_rate": 0.00014893275557695393, "loss": 0.0662, "theoretical_loss": 3.3657801872749027, "tokens_seen": 2813329408 }, { "epoch": 0.85, "learning_rate": 0.00014885251163537154, "loss": 0.0646, "theoretical_loss": 3.3657559661444827, "tokens_seen": 2813591552 }, { "epoch": 0.85, "learning_rate": 0.00014877226769378912, "loss": 0.0639, "theoretical_loss": 3.3657317479024633, "tokens_seen": 2813853696 }, { "epoch": 0.85, "learning_rate": 0.00014869202375220674, "loss": 0.0641, "theoretical_loss": 3.3657075325482304, "tokens_seen": 2814115840 }, { "epoch": 0.85, "learning_rate": 0.0001486117798106243, "loss": 0.0652, "theoretical_loss": 3.3656833200811707, "tokens_seen": 2814377984 }, { "epoch": 0.85, "learning_rate": 0.00014853153586904188, "loss": 0.0669, "theoretical_loss": 3.3656591105006712, "tokens_seen": 2814640128 }, { "epoch": 0.85, "learning_rate": 0.0001484512919274595, "loss": 0.0661, "theoretical_loss": 3.3656349038061193, "tokens_seen": 2814902272 }, { "epoch": 0.85, "learning_rate": 0.00014837104798587705, "loss": 0.0641, "theoretical_loss": 3.3656106999969015, "tokens_seen": 2815164416 }, { "epoch": 0.85, "learning_rate": 0.00014829080404429466, "loss": 0.0645, "theoretical_loss": 3.365586499072406, "tokens_seen": 2815426560 }, { "epoch": 0.85, "learning_rate": 0.00014821056010271225, "loss": 0.0681, "theoretical_loss": 3.36556230103202, "tokens_seen": 2815688704 }, { "epoch": 0.85, "learning_rate": 0.00014813031616112983, "loss": 0.0642, "theoretical_loss": 3.3655381058751317, "tokens_seen": 2815950848 }, { "epoch": 0.85, "learning_rate": 0.00014805007221954742, "loss": 0.0678, "theoretical_loss": 3.3655139136011285, "tokens_seen": 2816212992 }, { "epoch": 0.85, "learning_rate": 0.000147969828277965, "loss": 0.0675, "theoretical_loss": 3.3654897242093993, "tokens_seen": 2816475136 }, { "epoch": 0.85, "learning_rate": 0.00014788958433638262, "loss": 0.0641, "theoretical_loss": 3.365465537699332, "tokens_seen": 2816737280 }, { "epoch": 0.85, "learning_rate": 0.0001478093403948002, "loss": 0.0676, "theoretical_loss": 3.3654413540703154, "tokens_seen": 2816999424 }, { "epoch": 0.85, "learning_rate": 0.0001477290964532178, "loss": 0.0669, "theoretical_loss": 3.365417173321738, "tokens_seen": 2817261568 }, { "epoch": 0.85, "learning_rate": 0.00014764885251163538, "loss": 0.067, "theoretical_loss": 3.3653929954529893, "tokens_seen": 2817523712 }, { "epoch": 0.85, "learning_rate": 0.00014756860857005296, "loss": 0.0669, "theoretical_loss": 3.3653688204634578, "tokens_seen": 2817785856 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.0007476872997358441, "objective/train/docs_used": 1024155, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2878682613372803, "objective/train/original_loss": 1.2878682613372803, "objective/train/theoretical_loss": 3.3653446483525333, "objective/train/tokens_used": 2838508000, "objective/train/value_avg": -0.0068511962890625, "objective/train/value_loss": 0.0002798317582346499, "objective/train/value_max": -3.1948089599609375e-05, "objective/train/value_min": -0.99462890625, "objective/train/value_reward_corr": 0.7159308124042012, "objective/train/value_std": 0.0169219970703125, "objective/train/weight_avg": 1.0008704662322998, "objective/train/weighted_lm_loss": 1.2883635759353638, "objective/train/weights_max": 1.835062026977539, "objective/train/weights_min": 0.3006945848464966, "theoretical_loss": 3.3653446483525333, "tokens_seen": 2818048000 }, { "epoch": 0.85, "learning_rate": 0.00014748836462847057, "loss": 0.0658, "theoretical_loss": 3.3653446483525333, "tokens_seen": 2818048000 }, { "epoch": 0.85, "learning_rate": 0.00014740812068688813, "loss": 0.0668, "theoretical_loss": 3.365320479119605, "tokens_seen": 2818310144 }, { "epoch": 0.85, "learning_rate": 0.00014732787674530574, "loss": 0.0656, "theoretical_loss": 3.3652963127640625, "tokens_seen": 2818572288 }, { "epoch": 0.85, "learning_rate": 0.00014724763280372333, "loss": 0.0665, "theoretical_loss": 3.365272149285296, "tokens_seen": 2818834432 }, { "epoch": 0.85, "learning_rate": 0.0001471673888621409, "loss": 0.0685, "theoretical_loss": 3.3652479886826954, "tokens_seen": 2819096576 }, { "epoch": 0.85, "learning_rate": 0.0001470871449205585, "loss": 0.0635, "theoretical_loss": 3.3652238309556513, "tokens_seen": 2819358720 }, { "epoch": 0.85, "learning_rate": 0.00014700690097897609, "loss": 0.0673, "theoretical_loss": 3.365199676103553, "tokens_seen": 2819620864 }, { "epoch": 0.85, "learning_rate": 0.0001469266570373937, "loss": 0.067, "theoretical_loss": 3.3651755241257924, "tokens_seen": 2819883008 }, { "epoch": 0.85, "learning_rate": 0.00014684641309581126, "loss": 0.063, "theoretical_loss": 3.36515137502176, "tokens_seen": 2820145152 }, { "epoch": 0.85, "learning_rate": 0.00014676616915422887, "loss": 0.0672, "theoretical_loss": 3.3651272287908465, "tokens_seen": 2820407296 }, { "epoch": 0.85, "learning_rate": 0.00014668592521264646, "loss": 0.064, "theoretical_loss": 3.365103085432443, "tokens_seen": 2820669440 }, { "epoch": 0.85, "learning_rate": 0.00014660568127106404, "loss": 0.0651, "theoretical_loss": 3.365078944945941, "tokens_seen": 2820931584 }, { "epoch": 0.85, "learning_rate": 0.00014652543732948163, "loss": 0.0643, "theoretical_loss": 3.3650548073307327, "tokens_seen": 2821193728 }, { "epoch": 0.86, "learning_rate": 0.0001464451933878992, "loss": 0.0682, "theoretical_loss": 3.365030672586209, "tokens_seen": 2821455872 }, { "epoch": 0.86, "learning_rate": 0.00014636494944631682, "loss": 0.065, "theoretical_loss": 3.3650065407117618, "tokens_seen": 2821718016 }, { "epoch": 0.86, "learning_rate": 0.00014628470550473438, "loss": 0.0646, "theoretical_loss": 3.3649824117067832, "tokens_seen": 2821980160 }, { "epoch": 0.86, "learning_rate": 0.00014620446156315197, "loss": 0.067, "theoretical_loss": 3.3649582855706663, "tokens_seen": 2822242304 }, { "epoch": 0.86, "learning_rate": 0.00014612421762156958, "loss": 0.0647, "theoretical_loss": 3.364934162302803, "tokens_seen": 2822504448 }, { "epoch": 0.86, "learning_rate": 0.00014604397367998717, "loss": 0.0665, "theoretical_loss": 3.3649100419025855, "tokens_seen": 2822766592 }, { "epoch": 0.86, "learning_rate": 0.00014596372973840475, "loss": 0.0647, "theoretical_loss": 3.364885924369408, "tokens_seen": 2823028736 }, { "epoch": 0.86, "learning_rate": 0.00014588348579682234, "loss": 0.064, "theoretical_loss": 3.3648618097026617, "tokens_seen": 2823290880 }, { "epoch": 0.86, "learning_rate": 0.00014580324185523995, "loss": 0.0654, "theoretical_loss": 3.3648376979017414, "tokens_seen": 2823553024 }, { "epoch": 0.86, "learning_rate": 0.00014572299791365754, "loss": 0.0654, "theoretical_loss": 3.3648135889660398, "tokens_seen": 2823815168 }, { "epoch": 0.86, "learning_rate": 0.0001456427539720751, "loss": 0.0633, "theoretical_loss": 3.3647894828949503, "tokens_seen": 2824077312 }, { "epoch": 0.86, "learning_rate": 0.0001455625100304927, "loss": 0.0663, "theoretical_loss": 3.364765379687867, "tokens_seen": 2824339456 }, { "epoch": 0.86, "objective/train/advantage_avg": -0.0002453245688229799, "objective/train/docs_used": 1026596, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.294011116027832, "objective/train/original_loss": 1.2940109968185425, "objective/train/theoretical_loss": 3.3647412793441838, "objective/train/tokens_used": 2845061600, "objective/train/value_avg": -0.00684356689453125, "objective/train/value_loss": 0.0001892550935735926, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.68603515625, "objective/train/value_reward_corr": 0.7367796299909823, "objective/train/value_std": 0.01493072509765625, "objective/train/weight_avg": 0.9998413920402527, "objective/train/weighted_lm_loss": 1.2936891317367554, "objective/train/weights_max": 1.3273746967315674, "objective/train/weights_min": 0.36991289258003235, "theoretical_loss": 3.3647412793441838, "tokens_seen": 2824601600 }, { "epoch": 0.86, "learning_rate": 0.0001454822660889103, "loss": 0.0631, "theoretical_loss": 3.3647412793441838, "tokens_seen": 2824601600 }, { "epoch": 0.86, "learning_rate": 0.00014540202214732788, "loss": 0.0628, "theoretical_loss": 3.364717181863295, "tokens_seen": 2824863744 }, { "epoch": 0.86, "learning_rate": 0.00014532177820574546, "loss": 0.064, "theoretical_loss": 3.3646930872445946, "tokens_seen": 2825125888 }, { "epoch": 0.86, "learning_rate": 0.00014524153426416305, "loss": 0.0642, "theoretical_loss": 3.3646689954874773, "tokens_seen": 2825388032 }, { "epoch": 0.86, "learning_rate": 0.00014516129032258066, "loss": 0.0664, "theoretical_loss": 3.3646449065913377, "tokens_seen": 2825650176 }, { "epoch": 0.86, "learning_rate": 0.00014508104638099822, "loss": 0.0633, "theoretical_loss": 3.364620820555571, "tokens_seen": 2825912320 }, { "epoch": 0.86, "learning_rate": 0.00014500080243941583, "loss": 0.0634, "theoretical_loss": 3.3645967373795718, "tokens_seen": 2826174464 }, { "epoch": 0.86, "learning_rate": 0.00014492055849783342, "loss": 0.0678, "theoretical_loss": 3.3645726570627357, "tokens_seen": 2826436608 }, { "epoch": 0.86, "learning_rate": 0.00014484031455625103, "loss": 0.065, "theoretical_loss": 3.3645485796044574, "tokens_seen": 2826698752 }, { "epoch": 0.86, "learning_rate": 0.0001447600706146686, "loss": 0.0638, "theoretical_loss": 3.3645245050041335, "tokens_seen": 2826960896 }, { "epoch": 0.86, "learning_rate": 0.00014467982667308617, "loss": 0.0645, "theoretical_loss": 3.364500433261159, "tokens_seen": 2827223040 }, { "epoch": 0.86, "learning_rate": 0.0001445995827315038, "loss": 0.0639, "theoretical_loss": 3.3644763643749305, "tokens_seen": 2827485184 }, { "epoch": 0.86, "learning_rate": 0.00014451933878992135, "loss": 0.0655, "theoretical_loss": 3.364452298344844, "tokens_seen": 2827747328 }, { "epoch": 0.86, "learning_rate": 0.00014443909484833896, "loss": 0.067, "theoretical_loss": 3.3644282351702954, "tokens_seen": 2828009472 }, { "epoch": 0.86, "learning_rate": 0.00014435885090675654, "loss": 0.0643, "theoretical_loss": 3.364404174850682, "tokens_seen": 2828271616 }, { "epoch": 0.86, "learning_rate": 0.00014427860696517413, "loss": 0.065, "theoretical_loss": 3.3643801173853998, "tokens_seen": 2828533760 }, { "epoch": 0.86, "learning_rate": 0.00014419836302359171, "loss": 0.0642, "theoretical_loss": 3.364356062773846, "tokens_seen": 2828795904 }, { "epoch": 0.86, "learning_rate": 0.0001441181190820093, "loss": 0.0676, "theoretical_loss": 3.3643320110154176, "tokens_seen": 2829058048 }, { "epoch": 0.86, "learning_rate": 0.0001440378751404269, "loss": 0.0668, "theoretical_loss": 3.364307962109512, "tokens_seen": 2829320192 }, { "epoch": 0.86, "learning_rate": 0.0001439576311988445, "loss": 0.0652, "theoretical_loss": 3.364283916055527, "tokens_seen": 2829582336 }, { "epoch": 0.86, "learning_rate": 0.00014387738725726208, "loss": 0.0635, "theoretical_loss": 3.364259872852859, "tokens_seen": 2829844480 }, { "epoch": 0.86, "learning_rate": 0.00014379714331567967, "loss": 0.0648, "theoretical_loss": 3.364235832500907, "tokens_seen": 2830106624 }, { "epoch": 0.86, "learning_rate": 0.00014371689937409725, "loss": 0.0645, "theoretical_loss": 3.3642117949990684, "tokens_seen": 2830368768 }, { "epoch": 0.86, "learning_rate": 0.00014363665543251484, "loss": 0.064, "theoretical_loss": 3.364187760346742, "tokens_seen": 2830630912 }, { "epoch": 0.86, "learning_rate": 0.00014355641149093243, "loss": 0.0636, "theoretical_loss": 3.3641637285433257, "tokens_seen": 2830893056 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.0009019442368298769, "objective/train/docs_used": 1029215, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2658255100250244, "objective/train/original_loss": 1.2658252716064453, "objective/train/theoretical_loss": 3.364139699588218, "objective/train/tokens_used": 2851615200, "objective/train/value_avg": -0.0078277587890625, "objective/train/value_loss": 0.0002247198426630348, "objective/train/value_max": -1.9371509552001953e-05, "objective/train/value_min": -0.9765625, "objective/train/value_reward_corr": 0.6742154938115128, "objective/train/value_std": 0.01406097412109375, "objective/train/weight_avg": 1.0009979009628296, "objective/train/weighted_lm_loss": 1.266892671585083, "objective/train/weights_max": 1.2235815525054932, "objective/train/weights_min": 0.24483628571033478, "theoretical_loss": 3.364139699588218, "tokens_seen": 2831155200 }, { "epoch": 0.86, "learning_rate": 0.00014347616754935004, "loss": 0.065, "theoretical_loss": 3.364139699588218, "tokens_seen": 2831155200 }, { "epoch": 0.86, "learning_rate": 0.00014339592360776762, "loss": 0.0651, "theoretical_loss": 3.3641156734808173, "tokens_seen": 2831417344 }, { "epoch": 0.86, "learning_rate": 0.00014331567966618518, "loss": 0.0662, "theoretical_loss": 3.364091650220523, "tokens_seen": 2831679488 }, { "epoch": 0.86, "learning_rate": 0.0001432354357246028, "loss": 0.0643, "theoretical_loss": 3.3640676298067347, "tokens_seen": 2831941632 }, { "epoch": 0.86, "learning_rate": 0.00014315519178302038, "loss": 0.0662, "theoretical_loss": 3.3640436122388504, "tokens_seen": 2832203776 }, { "epoch": 0.86, "learning_rate": 0.000143074947841438, "loss": 0.0661, "theoretical_loss": 3.364019597516271, "tokens_seen": 2832465920 }, { "epoch": 0.86, "learning_rate": 0.00014299470389985555, "loss": 0.064, "theoretical_loss": 3.3639955856383947, "tokens_seen": 2832728064 }, { "epoch": 0.86, "learning_rate": 0.00014291445995827316, "loss": 0.0676, "theoretical_loss": 3.363971576604622, "tokens_seen": 2832990208 }, { "epoch": 0.86, "learning_rate": 0.00014283421601669075, "loss": 0.0664, "theoretical_loss": 3.3639475704143535, "tokens_seen": 2833252352 }, { "epoch": 0.86, "learning_rate": 0.00014275397207510833, "loss": 0.0653, "theoretical_loss": 3.3639235670669883, "tokens_seen": 2833514496 }, { "epoch": 0.86, "learning_rate": 0.00014267372813352592, "loss": 0.0666, "theoretical_loss": 3.3638995665619276, "tokens_seen": 2833776640 }, { "epoch": 0.86, "learning_rate": 0.0001425934841919435, "loss": 0.0646, "theoretical_loss": 3.363875568898571, "tokens_seen": 2834038784 }, { "epoch": 0.86, "learning_rate": 0.00014251324025036112, "loss": 0.0646, "theoretical_loss": 3.36385157407632, "tokens_seen": 2834300928 }, { "epoch": 0.86, "learning_rate": 0.00014243299630877868, "loss": 0.0637, "theoretical_loss": 3.3638275820945758, "tokens_seen": 2834563072 }, { "epoch": 0.86, "learning_rate": 0.00014235275236719626, "loss": 0.0651, "theoretical_loss": 3.3638035929527392, "tokens_seen": 2834825216 }, { "epoch": 0.86, "learning_rate": 0.00014227250842561388, "loss": 0.0659, "theoretical_loss": 3.3637796066502106, "tokens_seen": 2835087360 }, { "epoch": 0.86, "learning_rate": 0.00014219226448403146, "loss": 0.0657, "theoretical_loss": 3.3637556231863925, "tokens_seen": 2835349504 }, { "epoch": 0.86, "learning_rate": 0.00014211202054244905, "loss": 0.0651, "theoretical_loss": 3.363731642560686, "tokens_seen": 2835611648 }, { "epoch": 0.86, "learning_rate": 0.00014203177660086663, "loss": 0.0677, "theoretical_loss": 3.3637076647724937, "tokens_seen": 2835873792 }, { "epoch": 0.86, "learning_rate": 0.00014195153265928424, "loss": 0.0673, "theoretical_loss": 3.3636836898212166, "tokens_seen": 2836135936 }, { "epoch": 0.86, "learning_rate": 0.00014187128871770183, "loss": 0.066, "theoretical_loss": 3.3636597177062573, "tokens_seen": 2836398080 }, { "epoch": 0.86, "learning_rate": 0.0001417910447761194, "loss": 0.0662, "theoretical_loss": 3.363635748427018, "tokens_seen": 2836660224 }, { "epoch": 0.86, "learning_rate": 0.000141710800834537, "loss": 0.0682, "theoretical_loss": 3.363611781982902, "tokens_seen": 2836922368 }, { "epoch": 0.86, "learning_rate": 0.00014163055689295459, "loss": 0.0651, "theoretical_loss": 3.3635878183733112, "tokens_seen": 2837184512 }, { "epoch": 0.86, "learning_rate": 0.00014155031295137217, "loss": 0.0662, "theoretical_loss": 3.3635638575976485, "tokens_seen": 2837446656 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.0008682442712597549, "objective/train/docs_used": 1031594, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2491042613983154, "objective/train/original_loss": 1.2491042613983154, "objective/train/theoretical_loss": 3.3635398996553176, "objective/train/tokens_used": 2858168800, "objective/train/value_avg": -0.0068206787109375, "objective/train/value_loss": 0.00021514028776437044, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.81103515625, "objective/train/value_reward_corr": 0.643333056599688, "objective/train/value_std": 0.01387786865234375, "objective/train/weight_avg": 1.0009657144546509, "objective/train/weighted_lm_loss": 1.2500951290130615, "objective/train/weights_max": 1.7786227464675903, "objective/train/weights_min": 0.3690904378890991, "theoretical_loss": 3.3635398996553176, "tokens_seen": 2837708800 }, { "epoch": 0.86, "learning_rate": 0.00014147006900978976, "loss": 0.0652, "theoretical_loss": 3.3635398996553176, "tokens_seen": 2837708800 }, { "epoch": 0.86, "learning_rate": 0.00014138982506820734, "loss": 0.067, "theoretical_loss": 3.3635159445457212, "tokens_seen": 2837970944 }, { "epoch": 0.86, "learning_rate": 0.00014130958112662496, "loss": 0.0647, "theoretical_loss": 3.363491992268263, "tokens_seen": 2838233088 }, { "epoch": 0.86, "learning_rate": 0.00014122933718504251, "loss": 0.0644, "theoretical_loss": 3.3634680428223467, "tokens_seen": 2838495232 }, { "epoch": 0.86, "learning_rate": 0.00014114909324346013, "loss": 0.0611, "theoretical_loss": 3.3634440962073757, "tokens_seen": 2838757376 }, { "epoch": 0.86, "learning_rate": 0.0001410688493018777, "loss": 0.0642, "theoretical_loss": 3.3634201524227545, "tokens_seen": 2839019520 }, { "epoch": 0.86, "learning_rate": 0.00014098860536029532, "loss": 0.0665, "theoretical_loss": 3.363396211467887, "tokens_seen": 2839281664 }, { "epoch": 0.86, "learning_rate": 0.00014090836141871288, "loss": 0.0655, "theoretical_loss": 3.3633722733421774, "tokens_seen": 2839543808 }, { "epoch": 0.86, "learning_rate": 0.00014082811747713047, "loss": 0.0656, "theoretical_loss": 3.363348338045031, "tokens_seen": 2839805952 }, { "epoch": 0.86, "learning_rate": 0.00014074787353554808, "loss": 0.0656, "theoretical_loss": 3.3633244055758516, "tokens_seen": 2840068096 }, { "epoch": 0.86, "learning_rate": 0.00014066762959396564, "loss": 0.0646, "theoretical_loss": 3.363300475934045, "tokens_seen": 2840330240 }, { "epoch": 0.86, "learning_rate": 0.00014058738565238325, "loss": 0.0646, "theoretical_loss": 3.363276549119015, "tokens_seen": 2840592384 }, { "epoch": 0.86, "learning_rate": 0.00014050714171080084, "loss": 0.0648, "theoretical_loss": 3.3632526251301678, "tokens_seen": 2840854528 }, { "epoch": 0.86, "learning_rate": 0.00014042689776921845, "loss": 0.0653, "theoretical_loss": 3.3632287039669087, "tokens_seen": 2841116672 }, { "epoch": 0.86, "learning_rate": 0.000140346653827636, "loss": 0.0663, "theoretical_loss": 3.363204785628643, "tokens_seen": 2841378816 }, { "epoch": 0.86, "learning_rate": 0.0001402664098860536, "loss": 0.0672, "theoretical_loss": 3.363180870114777, "tokens_seen": 2841640960 }, { "epoch": 0.86, "learning_rate": 0.0001401861659444712, "loss": 0.0656, "theoretical_loss": 3.3631569574247164, "tokens_seen": 2841903104 }, { "epoch": 0.86, "learning_rate": 0.0001401059220028888, "loss": 0.0661, "theoretical_loss": 3.3631330475578674, "tokens_seen": 2842165248 }, { "epoch": 0.86, "learning_rate": 0.00014002567806130638, "loss": 0.0652, "theoretical_loss": 3.363109140513636, "tokens_seen": 2842427392 }, { "epoch": 0.86, "learning_rate": 0.00013994543411972396, "loss": 0.0663, "theoretical_loss": 3.3630852362914294, "tokens_seen": 2842689536 }, { "epoch": 0.86, "learning_rate": 0.00013986519017814155, "loss": 0.0655, "theoretical_loss": 3.363061334890654, "tokens_seen": 2842951680 }, { "epoch": 0.86, "learning_rate": 0.00013978494623655913, "loss": 0.0647, "theoretical_loss": 3.363037436310716, "tokens_seen": 2843213824 }, { "epoch": 0.86, "learning_rate": 0.00013970470229497672, "loss": 0.0645, "theoretical_loss": 3.363013540551023, "tokens_seen": 2843475968 }, { "epoch": 0.86, "learning_rate": 0.00013962445835339433, "loss": 0.0669, "theoretical_loss": 3.362989647610983, "tokens_seen": 2843738112 }, { "epoch": 0.86, "learning_rate": 0.00013954421441181192, "loss": 0.0658, "theoretical_loss": 3.362965757490002, "tokens_seen": 2844000256 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.0007202853448688984, "objective/train/docs_used": 1033870, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3085803985595703, "objective/train/original_loss": 1.3085805177688599, "objective/train/theoretical_loss": 3.3629418701874885, "objective/train/tokens_used": 2864722400, "objective/train/value_avg": -0.005878448486328125, "objective/train/value_loss": 0.00014473906776402146, "objective/train/value_max": -2.7120113372802734e-05, "objective/train/value_min": -0.4228515625, "objective/train/value_reward_corr": 0.6706889494136361, "objective/train/value_std": 0.01087188720703125, "objective/train/weight_avg": 1.0007864236831665, "objective/train/weighted_lm_loss": 1.3093208074569702, "objective/train/weights_max": 1.4480531215667725, "objective/train/weights_min": 0.3685835599899292, "theoretical_loss": 3.3629418701874885, "tokens_seen": 2844262400 }, { "epoch": 0.86, "learning_rate": 0.00013946397047022948, "loss": 0.0664, "theoretical_loss": 3.3629418701874885, "tokens_seen": 2844262400 }, { "epoch": 0.86, "learning_rate": 0.0001393837265286471, "loss": 0.0656, "theoretical_loss": 3.3629179857028495, "tokens_seen": 2844524544 }, { "epoch": 0.86, "learning_rate": 0.00013930348258706467, "loss": 0.0649, "theoretical_loss": 3.362894104035494, "tokens_seen": 2844786688 }, { "epoch": 0.86, "learning_rate": 0.0001392232386454823, "loss": 0.0642, "theoretical_loss": 3.3628702251848295, "tokens_seen": 2845048832 }, { "epoch": 0.86, "learning_rate": 0.00013914299470389985, "loss": 0.0643, "theoretical_loss": 3.3628463491502645, "tokens_seen": 2845310976 }, { "epoch": 0.86, "learning_rate": 0.00013906275076231746, "loss": 0.0664, "theoretical_loss": 3.362822475931207, "tokens_seen": 2845573120 }, { "epoch": 0.86, "learning_rate": 0.00013898250682073504, "loss": 0.066, "theoretical_loss": 3.362798605527066, "tokens_seen": 2845835264 }, { "epoch": 0.86, "learning_rate": 0.0001389022628791526, "loss": 0.0654, "theoretical_loss": 3.362774737937251, "tokens_seen": 2846097408 }, { "epoch": 0.86, "learning_rate": 0.00013882201893757021, "loss": 0.0651, "theoretical_loss": 3.3627508731611697, "tokens_seen": 2846359552 }, { "epoch": 0.86, "learning_rate": 0.0001387417749959878, "loss": 0.0645, "theoretical_loss": 3.362727011198232, "tokens_seen": 2846621696 }, { "epoch": 0.86, "learning_rate": 0.0001386615310544054, "loss": 0.0633, "theoretical_loss": 3.362703152047848, "tokens_seen": 2846883840 }, { "epoch": 0.86, "learning_rate": 0.00013858128711282297, "loss": 0.0639, "theoretical_loss": 3.362679295709426, "tokens_seen": 2847145984 }, { "epoch": 0.86, "learning_rate": 0.00013850104317124058, "loss": 0.0638, "theoretical_loss": 3.3626554421823758, "tokens_seen": 2847408128 }, { "epoch": 0.86, "learning_rate": 0.00013842079922965817, "loss": 0.0667, "theoretical_loss": 3.3626315914661085, "tokens_seen": 2847670272 }, { "epoch": 0.86, "learning_rate": 0.00013834055528807575, "loss": 0.0634, "theoretical_loss": 3.362607743560033, "tokens_seen": 2847932416 }, { "epoch": 0.86, "learning_rate": 0.00013826031134649334, "loss": 0.0655, "theoretical_loss": 3.36258389846356, "tokens_seen": 2848194560 }, { "epoch": 0.86, "learning_rate": 0.00013818006740491093, "loss": 0.0642, "theoretical_loss": 3.3625600561761, "tokens_seen": 2848456704 }, { "epoch": 0.86, "learning_rate": 0.00013809982346332854, "loss": 0.0636, "theoretical_loss": 3.362536216697064, "tokens_seen": 2848718848 }, { "epoch": 0.86, "learning_rate": 0.00013801957952174612, "loss": 0.0652, "theoretical_loss": 3.362512380025862, "tokens_seen": 2848980992 }, { "epoch": 0.86, "learning_rate": 0.00013793933558016368, "loss": 0.0656, "theoretical_loss": 3.3624885461619054, "tokens_seen": 2849243136 }, { "epoch": 0.86, "learning_rate": 0.0001378590916385813, "loss": 0.0661, "theoretical_loss": 3.362464715104605, "tokens_seen": 2849505280 }, { "epoch": 0.86, "learning_rate": 0.00013777884769699888, "loss": 0.0649, "theoretical_loss": 3.362440886853373, "tokens_seen": 2849767424 }, { "epoch": 0.86, "learning_rate": 0.00013769860375541647, "loss": 0.0661, "theoretical_loss": 3.36241706140762, "tokens_seen": 2850029568 }, { "epoch": 0.86, "learning_rate": 0.00013761835981383405, "loss": 0.0663, "theoretical_loss": 3.3623932387667583, "tokens_seen": 2850291712 }, { "epoch": 0.86, "learning_rate": 0.00013753811587225166, "loss": 0.0652, "theoretical_loss": 3.362369418930199, "tokens_seen": 2850553856 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.0005318819312378764, "objective/train/docs_used": 1036421, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2573963403701782, "objective/train/original_loss": 1.2573959827423096, "objective/train/theoretical_loss": 3.3623456018973554, "objective/train/tokens_used": 2871276000, "objective/train/value_avg": -0.007602691650390625, "objective/train/value_loss": 0.0002422871912131086, "objective/train/value_max": -3.737211227416992e-05, "objective/train/value_min": -0.87158203125, "objective/train/value_reward_corr": 0.7889085844853957, "objective/train/value_std": 0.0200347900390625, "objective/train/weight_avg": 1.0006413459777832, "objective/train/weighted_lm_loss": 1.257759690284729, "objective/train/weights_max": 1.4836885929107666, "objective/train/weights_min": 0.36959269642829895, "theoretical_loss": 3.3623456018973554, "tokens_seen": 2850816000 }, { "epoch": 0.86, "learning_rate": 0.00013745787193066925, "loss": 0.0657, "theoretical_loss": 3.3623456018973554, "tokens_seen": 2850816000 }, { "epoch": 0.86, "learning_rate": 0.0001373776279890868, "loss": 0.0641, "theoretical_loss": 3.362321787667639, "tokens_seen": 2851078144 }, { "epoch": 0.86, "learning_rate": 0.00013729738404750442, "loss": 0.0637, "theoretical_loss": 3.3622979762404617, "tokens_seen": 2851340288 }, { "epoch": 0.86, "learning_rate": 0.000137217140105922, "loss": 0.0633, "theoretical_loss": 3.3622741676152366, "tokens_seen": 2851602432 }, { "epoch": 0.86, "learning_rate": 0.00013713689616433962, "loss": 0.0628, "theoretical_loss": 3.3622503617913764, "tokens_seen": 2851864576 }, { "epoch": 0.86, "learning_rate": 0.00013705665222275718, "loss": 0.0637, "theoretical_loss": 3.362226558768294, "tokens_seen": 2852126720 }, { "epoch": 0.86, "learning_rate": 0.00013697640828117476, "loss": 0.0644, "theoretical_loss": 3.3622027585454024, "tokens_seen": 2852388864 }, { "epoch": 0.86, "learning_rate": 0.00013689616433959238, "loss": 0.0665, "theoretical_loss": 3.3621789611221153, "tokens_seen": 2852651008 }, { "epoch": 0.86, "learning_rate": 0.00013681592039800993, "loss": 0.0656, "theoretical_loss": 3.362155166497846, "tokens_seen": 2852913152 }, { "epoch": 0.86, "learning_rate": 0.00013673567645642755, "loss": 0.0641, "theoretical_loss": 3.362131374672008, "tokens_seen": 2853175296 }, { "epoch": 0.86, "learning_rate": 0.00013665543251484513, "loss": 0.0644, "theoretical_loss": 3.362107585644015, "tokens_seen": 2853437440 }, { "epoch": 0.86, "learning_rate": 0.00013657518857326274, "loss": 0.0646, "theoretical_loss": 3.362083799413281, "tokens_seen": 2853699584 }, { "epoch": 0.86, "learning_rate": 0.0001364949446316803, "loss": 0.0646, "theoretical_loss": 3.3620600159792207, "tokens_seen": 2853961728 }, { "epoch": 0.86, "learning_rate": 0.0001364147006900979, "loss": 0.0662, "theoretical_loss": 3.3620362353412476, "tokens_seen": 2854223872 }, { "epoch": 0.87, "learning_rate": 0.0001363344567485155, "loss": 0.066, "theoretical_loss": 3.362012457498777, "tokens_seen": 2854486016 }, { "epoch": 0.87, "learning_rate": 0.00013625421280693309, "loss": 0.0662, "theoretical_loss": 3.361988682451223, "tokens_seen": 2854748160 }, { "epoch": 0.87, "learning_rate": 0.00013617396886535067, "loss": 0.0652, "theoretical_loss": 3.361964910198001, "tokens_seen": 2855010304 }, { "epoch": 0.87, "learning_rate": 0.00013609372492376826, "loss": 0.0651, "theoretical_loss": 3.3619411407385256, "tokens_seen": 2855272448 }, { "epoch": 0.87, "learning_rate": 0.00013601348098218584, "loss": 0.066, "theoretical_loss": 3.3619173740722124, "tokens_seen": 2855534592 }, { "epoch": 0.87, "learning_rate": 0.00013593323704060343, "loss": 0.0653, "theoretical_loss": 3.361893610198476, "tokens_seen": 2855796736 }, { "epoch": 0.87, "learning_rate": 0.00013585299309902101, "loss": 0.0684, "theoretical_loss": 3.3618698491167334, "tokens_seen": 2856058880 }, { "epoch": 0.87, "learning_rate": 0.00013577274915743863, "loss": 0.063, "theoretical_loss": 3.3618460908263987, "tokens_seen": 2856321024 }, { "epoch": 0.87, "learning_rate": 0.0001356925052158562, "loss": 0.0672, "theoretical_loss": 3.361822335326889, "tokens_seen": 2856583168 }, { "epoch": 0.87, "learning_rate": 0.0001356122612742738, "loss": 0.065, "theoretical_loss": 3.3617985826176207, "tokens_seen": 2856845312 }, { "epoch": 0.87, "learning_rate": 0.00013553201733269138, "loss": 0.0641, "theoretical_loss": 3.361774832698009, "tokens_seen": 2857107456 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.00029029804863967, "objective/train/docs_used": 1038225, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3740379810333252, "objective/train/original_loss": 1.374037742614746, "objective/train/theoretical_loss": 3.3617510855674704, "objective/train/tokens_used": 2877829600, "objective/train/value_avg": -0.007160186767578125, "objective/train/value_loss": 0.00025967363035306334, "objective/train/value_max": -4.470348358154297e-05, "objective/train/value_min": -0.67431640625, "objective/train/value_reward_corr": 0.6895487345317597, "objective/train/value_std": 0.013885498046875, "objective/train/weight_avg": 1.0004048347473145, "objective/train/weighted_lm_loss": 1.3736540079116821, "objective/train/weights_max": 1.2676682472229004, "objective/train/weights_min": 0.37812936305999756, "theoretical_loss": 3.3617510855674704, "tokens_seen": 2857369600 }, { "epoch": 0.87, "learning_rate": 0.00013545177339110897, "loss": 0.0651, "theoretical_loss": 3.3617510855674704, "tokens_seen": 2857369600 }, { "epoch": 0.87, "learning_rate": 0.00013537152944952658, "loss": 0.0651, "theoretical_loss": 3.3617273412254223, "tokens_seen": 2857631744 }, { "epoch": 0.87, "learning_rate": 0.00013529128550794414, "loss": 0.065, "theoretical_loss": 3.361703599671281, "tokens_seen": 2857893888 }, { "epoch": 0.87, "learning_rate": 0.00013521104156636175, "loss": 0.0679, "theoretical_loss": 3.361679860904464, "tokens_seen": 2858156032 }, { "epoch": 0.87, "learning_rate": 0.00013513079762477934, "loss": 0.0639, "theoretical_loss": 3.3616561249243877, "tokens_seen": 2858418176 }, { "epoch": 0.87, "learning_rate": 0.0001350505536831969, "loss": 0.0677, "theoretical_loss": 3.36163239173047, "tokens_seen": 2858680320 }, { "epoch": 0.87, "learning_rate": 0.0001349703097416145, "loss": 0.0672, "theoretical_loss": 3.361608661322128, "tokens_seen": 2858942464 }, { "epoch": 0.87, "learning_rate": 0.0001348900658000321, "loss": 0.067, "theoretical_loss": 3.3615849336987798, "tokens_seen": 2859204608 }, { "epoch": 0.87, "learning_rate": 0.0001348098218584497, "loss": 0.0647, "theoretical_loss": 3.361561208859843, "tokens_seen": 2859466752 }, { "epoch": 0.87, "learning_rate": 0.00013472957791686727, "loss": 0.0649, "theoretical_loss": 3.361537486804736, "tokens_seen": 2859728896 }, { "epoch": 0.87, "learning_rate": 0.00013464933397528488, "loss": 0.0657, "theoretical_loss": 3.3615137675328763, "tokens_seen": 2859991040 }, { "epoch": 0.87, "learning_rate": 0.00013456909003370246, "loss": 0.066, "theoretical_loss": 3.361490051043683, "tokens_seen": 2860253184 }, { "epoch": 0.87, "learning_rate": 0.00013448884609212005, "loss": 0.0652, "theoretical_loss": 3.361466337336574, "tokens_seen": 2860515328 }, { "epoch": 0.87, "learning_rate": 0.00013440860215053763, "loss": 0.066, "theoretical_loss": 3.3614426264109687, "tokens_seen": 2860777472 }, { "epoch": 0.87, "learning_rate": 0.00013432835820895522, "loss": 0.0663, "theoretical_loss": 3.3614189182662857, "tokens_seen": 2861039616 }, { "epoch": 0.87, "learning_rate": 0.00013424811426737283, "loss": 0.0649, "theoretical_loss": 3.361395212901944, "tokens_seen": 2861301760 }, { "epoch": 0.87, "learning_rate": 0.0001341678703257904, "loss": 0.0646, "theoretical_loss": 3.3613715103173627, "tokens_seen": 2861563904 }, { "epoch": 0.87, "learning_rate": 0.00013408762638420798, "loss": 0.0658, "theoretical_loss": 3.361347810511962, "tokens_seen": 2861826048 }, { "epoch": 0.87, "learning_rate": 0.0001340073824426256, "loss": 0.0629, "theoretical_loss": 3.36132411348516, "tokens_seen": 2862088192 }, { "epoch": 0.87, "learning_rate": 0.00013392713850104317, "loss": 0.0657, "theoretical_loss": 3.3613004192363785, "tokens_seen": 2862350336 }, { "epoch": 0.87, "learning_rate": 0.00013384689455946076, "loss": 0.0644, "theoretical_loss": 3.361276727765036, "tokens_seen": 2862612480 }, { "epoch": 0.87, "learning_rate": 0.00013376665061787835, "loss": 0.0656, "theoretical_loss": 3.361253039070553, "tokens_seen": 2862874624 }, { "epoch": 0.87, "learning_rate": 0.00013368640667629596, "loss": 0.0641, "theoretical_loss": 3.36122935315235, "tokens_seen": 2863136768 }, { "epoch": 0.87, "learning_rate": 0.00013360616273471354, "loss": 0.0665, "theoretical_loss": 3.361205670009847, "tokens_seen": 2863398912 }, { "epoch": 0.87, "learning_rate": 0.0001335259187931311, "loss": 0.0651, "theoretical_loss": 3.361181989642465, "tokens_seen": 2863661056 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.00021905562607571483, "objective/train/docs_used": 1040737, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.214687466621399, "objective/train/original_loss": 1.214687466621399, "objective/train/theoretical_loss": 3.3611583120496253, "objective/train/tokens_used": 2884383200, "objective/train/value_avg": -0.006732940673828125, "objective/train/value_loss": 0.0001407399686286226, "objective/train/value_max": -1.895427703857422e-05, "objective/train/value_min": -0.482177734375, "objective/train/value_reward_corr": 0.780282191310512, "objective/train/value_std": 0.01496124267578125, "objective/train/weight_avg": 1.00028657913208, "objective/train/weighted_lm_loss": 1.214595079421997, "objective/train/weights_max": 1.286849856376648, "objective/train/weights_min": 0.42503201961517334, "theoretical_loss": 3.3611583120496253, "tokens_seen": 2863923200 }, { "epoch": 0.87, "learning_rate": 0.00013344567485154871, "loss": 0.0604, "theoretical_loss": 3.3611583120496253, "tokens_seen": 2863923200 }, { "epoch": 0.87, "learning_rate": 0.0001333654309099663, "loss": 0.0661, "theoretical_loss": 3.361134637230748, "tokens_seen": 2864185344 }, { "epoch": 0.87, "learning_rate": 0.0001332851869683839, "loss": 0.0637, "theoretical_loss": 3.361110965185255, "tokens_seen": 2864447488 }, { "epoch": 0.87, "learning_rate": 0.00013320494302680147, "loss": 0.0662, "theoretical_loss": 3.361087295912567, "tokens_seen": 2864709632 }, { "epoch": 0.87, "learning_rate": 0.00013312469908521906, "loss": 0.0655, "theoretical_loss": 3.3610636294121057, "tokens_seen": 2864971776 }, { "epoch": 0.87, "learning_rate": 0.00013304445514363667, "loss": 0.0649, "theoretical_loss": 3.361039965683293, "tokens_seen": 2865233920 }, { "epoch": 0.87, "learning_rate": 0.00013296421120205423, "loss": 0.0657, "theoretical_loss": 3.361016304725551, "tokens_seen": 2865496064 }, { "epoch": 0.87, "learning_rate": 0.00013288396726047184, "loss": 0.0666, "theoretical_loss": 3.360992646538301, "tokens_seen": 2865758208 }, { "epoch": 0.87, "learning_rate": 0.00013280372331888943, "loss": 0.0656, "theoretical_loss": 3.3609689911209664, "tokens_seen": 2866020352 }, { "epoch": 0.87, "learning_rate": 0.00013272347937730704, "loss": 0.0632, "theoretical_loss": 3.360945338472968, "tokens_seen": 2866282496 }, { "epoch": 0.87, "learning_rate": 0.0001326432354357246, "loss": 0.0669, "theoretical_loss": 3.36092168859373, "tokens_seen": 2866544640 }, { "epoch": 0.87, "learning_rate": 0.00013256299149414218, "loss": 0.0642, "theoretical_loss": 3.3608980414826735, "tokens_seen": 2866806784 }, { "epoch": 0.87, "learning_rate": 0.0001324827475525598, "loss": 0.0644, "theoretical_loss": 3.3608743971392228, "tokens_seen": 2867068928 }, { "epoch": 0.87, "learning_rate": 0.00013240250361097738, "loss": 0.0653, "theoretical_loss": 3.3608507555628, "tokens_seen": 2867331072 }, { "epoch": 0.87, "learning_rate": 0.00013232225966939497, "loss": 0.0644, "theoretical_loss": 3.360827116752829, "tokens_seen": 2867593216 }, { "epoch": 0.87, "learning_rate": 0.00013224201572781255, "loss": 0.0644, "theoretical_loss": 3.360803480708733, "tokens_seen": 2867855360 }, { "epoch": 0.87, "learning_rate": 0.00013216177178623016, "loss": 0.064, "theoretical_loss": 3.360779847429936, "tokens_seen": 2868117504 }, { "epoch": 0.87, "learning_rate": 0.00013208152784464772, "loss": 0.0633, "theoretical_loss": 3.3607562169158607, "tokens_seen": 2868379648 }, { "epoch": 0.87, "learning_rate": 0.0001320012839030653, "loss": 0.0652, "theoretical_loss": 3.360732589165932, "tokens_seen": 2868641792 }, { "epoch": 0.87, "learning_rate": 0.00013192103996148292, "loss": 0.0676, "theoretical_loss": 3.3607089641795738, "tokens_seen": 2868903936 }, { "epoch": 0.87, "learning_rate": 0.0001318407960199005, "loss": 0.0646, "theoretical_loss": 3.36068534195621, "tokens_seen": 2869166080 }, { "epoch": 0.87, "learning_rate": 0.0001317605520783181, "loss": 0.0654, "theoretical_loss": 3.3606617224952657, "tokens_seen": 2869428224 }, { "epoch": 0.87, "learning_rate": 0.00013168030813673568, "loss": 0.0667, "theoretical_loss": 3.3606381057961654, "tokens_seen": 2869690368 }, { "epoch": 0.87, "learning_rate": 0.00013160006419515326, "loss": 0.0632, "theoretical_loss": 3.3606144918583336, "tokens_seen": 2869952512 }, { "epoch": 0.87, "learning_rate": 0.00013151982025357088, "loss": 0.0633, "theoretical_loss": 3.3605908806811953, "tokens_seen": 2870214656 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.0008621090091764927, "objective/train/docs_used": 1043211, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.284454345703125, "objective/train/original_loss": 1.284454345703125, "objective/train/theoretical_loss": 3.3605672722641757, "objective/train/tokens_used": 2890936800, "objective/train/value_avg": -0.0079803466796875, "objective/train/value_loss": 0.00011206701310584322, "objective/train/value_max": -3.045797348022461e-05, "objective/train/value_min": -0.25439453125, "objective/train/value_reward_corr": 0.7807823403804984, "objective/train/value_std": 0.01438140869140625, "objective/train/weight_avg": 1.0009177923202515, "objective/train/weighted_lm_loss": 1.2848042249679565, "objective/train/weights_max": 1.1853408813476562, "objective/train/weights_min": 0.8233441710472107, "theoretical_loss": 3.3605672722641757, "tokens_seen": 2870476800 }, { "epoch": 0.87, "learning_rate": 0.00013143957631198843, "loss": 0.0626, "theoretical_loss": 3.3605672722641757, "tokens_seen": 2870476800 }, { "epoch": 0.87, "learning_rate": 0.00013135933237040605, "loss": 0.0648, "theoretical_loss": 3.3605436666067003, "tokens_seen": 2870738944 }, { "epoch": 0.87, "learning_rate": 0.00013127908842882363, "loss": 0.0629, "theoretical_loss": 3.3605200637081944, "tokens_seen": 2871001088 }, { "epoch": 0.87, "learning_rate": 0.0001311988444872412, "loss": 0.0659, "theoretical_loss": 3.360496463568084, "tokens_seen": 2871263232 }, { "epoch": 0.87, "learning_rate": 0.0001311186005456588, "loss": 0.0676, "theoretical_loss": 3.360472866185794, "tokens_seen": 2871525376 }, { "epoch": 0.87, "learning_rate": 0.0001310383566040764, "loss": 0.0671, "theoretical_loss": 3.3604492715607517, "tokens_seen": 2871787520 }, { "epoch": 0.87, "learning_rate": 0.000130958112662494, "loss": 0.0681, "theoretical_loss": 3.3604256796923826, "tokens_seen": 2872049664 }, { "epoch": 0.87, "learning_rate": 0.00013087786872091156, "loss": 0.0651, "theoretical_loss": 3.360402090580113, "tokens_seen": 2872311808 }, { "epoch": 0.87, "learning_rate": 0.00013079762477932917, "loss": 0.0642, "theoretical_loss": 3.3603785042233696, "tokens_seen": 2872573952 }, { "epoch": 0.87, "learning_rate": 0.00013071738083774676, "loss": 0.0616, "theoretical_loss": 3.360354920621579, "tokens_seen": 2872836096 }, { "epoch": 0.87, "learning_rate": 0.00013063713689616434, "loss": 0.0667, "theoretical_loss": 3.3603313397741683, "tokens_seen": 2873098240 }, { "epoch": 0.87, "learning_rate": 0.00013055689295458193, "loss": 0.0681, "theoretical_loss": 3.3603077616805646, "tokens_seen": 2873360384 }, { "epoch": 0.87, "learning_rate": 0.00013047664901299951, "loss": 0.0654, "theoretical_loss": 3.3602841863401944, "tokens_seen": 2873622528 }, { "epoch": 0.87, "learning_rate": 0.00013039640507141713, "loss": 0.0618, "theoretical_loss": 3.360260613752486, "tokens_seen": 2873884672 }, { "epoch": 0.87, "learning_rate": 0.00013031616112983468, "loss": 0.0653, "theoretical_loss": 3.3602370439168663, "tokens_seen": 2874146816 }, { "epoch": 0.87, "learning_rate": 0.0001302359171882523, "loss": 0.066, "theoretical_loss": 3.3602134768327634, "tokens_seen": 2874408960 }, { "epoch": 0.87, "learning_rate": 0.00013015567324666988, "loss": 0.0646, "theoretical_loss": 3.360189912499605, "tokens_seen": 2874671104 }, { "epoch": 0.87, "learning_rate": 0.00013007542930508747, "loss": 0.0657, "theoretical_loss": 3.360166350916819, "tokens_seen": 2874933248 }, { "epoch": 0.87, "learning_rate": 0.00012999518536350505, "loss": 0.0672, "theoretical_loss": 3.360142792083834, "tokens_seen": 2875195392 }, { "epoch": 0.87, "learning_rate": 0.00012991494142192264, "loss": 0.0672, "theoretical_loss": 3.3601192360000782, "tokens_seen": 2875457536 }, { "epoch": 0.87, "learning_rate": 0.00012983469748034025, "loss": 0.0647, "theoretical_loss": 3.360095682664981, "tokens_seen": 2875719680 }, { "epoch": 0.87, "learning_rate": 0.00012975445353875784, "loss": 0.065, "theoretical_loss": 3.3600721320779696, "tokens_seen": 2875981824 }, { "epoch": 0.87, "learning_rate": 0.0001296742095971754, "loss": 0.0663, "theoretical_loss": 3.3600485842384735, "tokens_seen": 2876243968 }, { "epoch": 0.87, "learning_rate": 0.000129593965655593, "loss": 0.0663, "theoretical_loss": 3.3600250391459223, "tokens_seen": 2876506112 }, { "epoch": 0.87, "learning_rate": 0.0001295137217140106, "loss": 0.0644, "theoretical_loss": 3.360001496799746, "tokens_seen": 2876768256 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.0009444629540666938, "objective/train/docs_used": 1045577, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3567603826522827, "objective/train/original_loss": 1.3567602634429932, "objective/train/theoretical_loss": 3.359977957199372, "objective/train/tokens_used": 2897490400, "objective/train/value_avg": -0.007648468017578125, "objective/train/value_loss": 0.00022198569786269218, "objective/train/value_max": -1.817941665649414e-05, "objective/train/value_min": -0.666015625, "objective/train/value_reward_corr": 0.7108661837889094, "objective/train/value_std": 0.01611328125, "objective/train/weight_avg": 1.0010464191436768, "objective/train/weighted_lm_loss": 1.3576266765594482, "objective/train/weights_max": 1.7288668155670166, "objective/train/weights_min": 0.36866194009780884, "theoretical_loss": 3.359977957199372, "tokens_seen": 2877030400 }, { "epoch": 0.87, "learning_rate": 0.00012943347777242818, "loss": 0.0676, "theoretical_loss": 3.359977957199372, "tokens_seen": 2877030400 }, { "epoch": 0.87, "learning_rate": 0.00012935323383084577, "loss": 0.0666, "theoretical_loss": 3.3599544203442306, "tokens_seen": 2877292544 }, { "epoch": 0.87, "learning_rate": 0.00012927298988926338, "loss": 0.0641, "theoretical_loss": 3.359930886233753, "tokens_seen": 2877554688 }, { "epoch": 0.87, "learning_rate": 0.00012919274594768096, "loss": 0.065, "theoretical_loss": 3.3599073548673672, "tokens_seen": 2877816832 }, { "epoch": 0.87, "learning_rate": 0.00012911250200609852, "loss": 0.0668, "theoretical_loss": 3.3598838262445048, "tokens_seen": 2878078976 }, { "epoch": 0.87, "learning_rate": 0.00012903225806451613, "loss": 0.0679, "theoretical_loss": 3.359860300364595, "tokens_seen": 2878341120 }, { "epoch": 0.87, "learning_rate": 0.00012895201412293372, "loss": 0.0623, "theoretical_loss": 3.359836777227069, "tokens_seen": 2878603264 }, { "epoch": 0.87, "learning_rate": 0.00012887177018135133, "loss": 0.0687, "theoretical_loss": 3.359813256831357, "tokens_seen": 2878865408 }, { "epoch": 0.87, "learning_rate": 0.0001287915262397689, "loss": 0.0653, "theoretical_loss": 3.3597897391768905, "tokens_seen": 2879127552 }, { "epoch": 0.87, "learning_rate": 0.00012871128229818648, "loss": 0.0647, "theoretical_loss": 3.3597662242630992, "tokens_seen": 2879389696 }, { "epoch": 0.87, "learning_rate": 0.0001286310383566041, "loss": 0.0657, "theoretical_loss": 3.359742712089415, "tokens_seen": 2879651840 }, { "epoch": 0.87, "learning_rate": 0.00012855079441502167, "loss": 0.0659, "theoretical_loss": 3.3597192026552696, "tokens_seen": 2879913984 }, { "epoch": 0.87, "learning_rate": 0.00012847055047343926, "loss": 0.0661, "theoretical_loss": 3.3596956959600943, "tokens_seen": 2880176128 }, { "epoch": 0.87, "learning_rate": 0.00012839030653185685, "loss": 0.0676, "theoretical_loss": 3.3596721920033197, "tokens_seen": 2880438272 }, { "epoch": 0.87, "learning_rate": 0.00012831006259027446, "loss": 0.0637, "theoretical_loss": 3.359648690784379, "tokens_seen": 2880700416 }, { "epoch": 0.87, "learning_rate": 0.00012822981864869202, "loss": 0.0648, "theoretical_loss": 3.3596251923027034, "tokens_seen": 2880962560 }, { "epoch": 0.87, "learning_rate": 0.0001281495747071096, "loss": 0.0642, "theoretical_loss": 3.3596016965577253, "tokens_seen": 2881224704 }, { "epoch": 0.87, "learning_rate": 0.00012806933076552721, "loss": 0.0646, "theoretical_loss": 3.359578203548877, "tokens_seen": 2881486848 }, { "epoch": 0.87, "learning_rate": 0.0001279890868239448, "loss": 0.0656, "theoretical_loss": 3.3595547132755907, "tokens_seen": 2881748992 }, { "epoch": 0.87, "learning_rate": 0.00012790884288236239, "loss": 0.0672, "theoretical_loss": 3.3595312257372996, "tokens_seen": 2882011136 }, { "epoch": 0.87, "learning_rate": 0.00012782859894077997, "loss": 0.0681, "theoretical_loss": 3.359507740933436, "tokens_seen": 2882273280 }, { "epoch": 0.87, "learning_rate": 0.00012774835499919756, "loss": 0.0642, "theoretical_loss": 3.359484258863434, "tokens_seen": 2882535424 }, { "epoch": 0.87, "learning_rate": 0.00012766811105761517, "loss": 0.0649, "theoretical_loss": 3.3594607795267253, "tokens_seen": 2882797568 }, { "epoch": 0.87, "learning_rate": 0.00012758786711603273, "loss": 0.0658, "theoretical_loss": 3.359437302922744, "tokens_seen": 2883059712 }, { "epoch": 0.87, "learning_rate": 0.00012750762317445034, "loss": 0.0637, "theoretical_loss": 3.3594138290509235, "tokens_seen": 2883321856 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.0009214555029757321, "objective/train/docs_used": 1048090, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.292439579963684, "objective/train/original_loss": 1.2924394607543945, "objective/train/theoretical_loss": 3.3593903579106974, "objective/train/tokens_used": 2904044000, "objective/train/value_avg": -0.00852203369140625, "objective/train/value_loss": 0.00013700471026822925, "objective/train/value_max": -1.6033649444580078e-05, "objective/train/value_min": -0.274658203125, "objective/train/value_reward_corr": 0.8968270892350809, "objective/train/value_std": 0.0258941650390625, "objective/train/weight_avg": 1.0009853839874268, "objective/train/weighted_lm_loss": 1.2942968606948853, "objective/train/weights_max": 1.2442163228988647, "objective/train/weights_min": 0.3884040415287018, "theoretical_loss": 3.3593903579106974, "tokens_seen": 2883584000 }, { "epoch": 0.87, "learning_rate": 0.00012742737923286793, "loss": 0.0652, "theoretical_loss": 3.3593903579106974, "tokens_seen": 2883584000 }, { "epoch": 0.87, "learning_rate": 0.0001273471352912855, "loss": 0.0641, "theoretical_loss": 3.3593668895014996, "tokens_seen": 2883846144 }, { "epoch": 0.87, "learning_rate": 0.0001272668913497031, "loss": 0.0645, "theoretical_loss": 3.3593434238227644, "tokens_seen": 2884108288 }, { "epoch": 0.87, "learning_rate": 0.00012718664740812068, "loss": 0.0658, "theoretical_loss": 3.3593199608739255, "tokens_seen": 2884370432 }, { "epoch": 0.87, "learning_rate": 0.0001271064034665383, "loss": 0.0648, "theoretical_loss": 3.359296500654418, "tokens_seen": 2884632576 }, { "epoch": 0.87, "learning_rate": 0.00012702615952495585, "loss": 0.0639, "theoretical_loss": 3.3592730431636753, "tokens_seen": 2884894720 }, { "epoch": 0.87, "learning_rate": 0.00012694591558337347, "loss": 0.0653, "theoretical_loss": 3.359249588401133, "tokens_seen": 2885156864 }, { "epoch": 0.87, "learning_rate": 0.00012686567164179105, "loss": 0.0663, "theoretical_loss": 3.3592261363662255, "tokens_seen": 2885419008 }, { "epoch": 0.87, "learning_rate": 0.00012678542770020864, "loss": 0.0645, "theoretical_loss": 3.359202687058388, "tokens_seen": 2885681152 }, { "epoch": 0.87, "learning_rate": 0.00012670518375862622, "loss": 0.0683, "theoretical_loss": 3.3591792404770557, "tokens_seen": 2885943296 }, { "epoch": 0.87, "learning_rate": 0.0001266249398170438, "loss": 0.065, "theoretical_loss": 3.359155796621664, "tokens_seen": 2886205440 }, { "epoch": 0.87, "learning_rate": 0.00012654469587546142, "loss": 0.0657, "theoretical_loss": 3.3591323554916483, "tokens_seen": 2886467584 }, { "epoch": 0.87, "learning_rate": 0.00012646445193387898, "loss": 0.0648, "theoretical_loss": 3.3591089170864445, "tokens_seen": 2886729728 }, { "epoch": 0.87, "learning_rate": 0.0001263842079922966, "loss": 0.0634, "theoretical_loss": 3.3590854814054882, "tokens_seen": 2886991872 }, { "epoch": 0.87, "learning_rate": 0.00012630396405071418, "loss": 0.0643, "theoretical_loss": 3.359062048448216, "tokens_seen": 2887254016 }, { "epoch": 0.88, "learning_rate": 0.00012622372010913176, "loss": 0.0656, "theoretical_loss": 3.3590386182140635, "tokens_seen": 2887516160 }, { "epoch": 0.88, "learning_rate": 0.00012614347616754935, "loss": 0.0658, "theoretical_loss": 3.3590151907024675, "tokens_seen": 2887778304 }, { "epoch": 0.88, "learning_rate": 0.00012606323222596693, "loss": 0.0685, "theoretical_loss": 3.3589917659128634, "tokens_seen": 2888040448 }, { "epoch": 0.88, "learning_rate": 0.00012598298828438455, "loss": 0.0639, "theoretical_loss": 3.3589683438446896, "tokens_seen": 2888302592 }, { "epoch": 0.88, "learning_rate": 0.00012590274434280213, "loss": 0.0647, "theoretical_loss": 3.358944924497382, "tokens_seen": 2888564736 }, { "epoch": 0.88, "learning_rate": 0.0001258225004012197, "loss": 0.0656, "theoretical_loss": 3.3589215078703782, "tokens_seen": 2888826880 }, { "epoch": 0.88, "learning_rate": 0.0001257422564596373, "loss": 0.0638, "theoretical_loss": 3.3588980939631146, "tokens_seen": 2889089024 }, { "epoch": 0.88, "learning_rate": 0.0001256620125180549, "loss": 0.0657, "theoretical_loss": 3.358874682775029, "tokens_seen": 2889351168 }, { "epoch": 0.88, "learning_rate": 0.00012558176857647247, "loss": 0.0654, "theoretical_loss": 3.358851274305559, "tokens_seen": 2889613312 }, { "epoch": 0.88, "learning_rate": 0.00012550152463489006, "loss": 0.0653, "theoretical_loss": 3.3588278685541426, "tokens_seen": 2889875456 }, { "epoch": 0.88, "objective/train/advantage_avg": 9.303795377491042e-05, "objective/train/docs_used": 1050561, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3007079362869263, "objective/train/original_loss": 1.3007079362869263, "objective/train/theoretical_loss": 3.358804465520217, "objective/train/tokens_used": 2910597600, "objective/train/value_avg": -0.0085906982421875, "objective/train/value_loss": 0.00047798847663216293, "objective/train/value_max": -1.1205673217773438e-05, "objective/train/value_min": -0.9912109375, "objective/train/value_reward_corr": 0.7482428192656447, "objective/train/value_std": 0.0229644775390625, "objective/train/weight_avg": 1.0002946853637695, "objective/train/weighted_lm_loss": 1.2997008562088013, "objective/train/weights_max": 2.048863172531128, "objective/train/weights_min": 0.22682039439678192, "theoretical_loss": 3.358804465520217, "tokens_seen": 2890137600 }, { "epoch": 0.88, "learning_rate": 0.00012542128069330767, "loss": 0.0652, "theoretical_loss": 3.358804465520217, "tokens_seen": 2890137600 }, { "epoch": 0.88, "learning_rate": 0.00012534103675172526, "loss": 0.064, "theoretical_loss": 3.358781065203221, "tokens_seen": 2890399744 }, { "epoch": 0.88, "learning_rate": 0.00012526079281014282, "loss": 0.0664, "theoretical_loss": 3.358757667602592, "tokens_seen": 2890661888 }, { "epoch": 0.88, "learning_rate": 0.00012518054886856043, "loss": 0.0651, "theoretical_loss": 3.3587342727177694, "tokens_seen": 2890924032 }, { "epoch": 0.88, "learning_rate": 0.00012510030492697801, "loss": 0.0647, "theoretical_loss": 3.3587108805481907, "tokens_seen": 2891186176 }, { "epoch": 0.88, "learning_rate": 0.00012502006098539563, "loss": 0.0635, "theoretical_loss": 3.358687491093295, "tokens_seen": 2891448320 }, { "epoch": 0.88, "learning_rate": 0.0001249398170438132, "loss": 0.0646, "theoretical_loss": 3.3586641043525214, "tokens_seen": 2891710464 }, { "epoch": 0.88, "learning_rate": 0.00012485957310223077, "loss": 0.0665, "theoretical_loss": 3.358640720325309, "tokens_seen": 2891972608 }, { "epoch": 0.88, "learning_rate": 0.00012477932916064838, "loss": 0.0657, "theoretical_loss": 3.3586173390110967, "tokens_seen": 2892234752 }, { "epoch": 0.88, "learning_rate": 0.00012469908521906597, "loss": 0.063, "theoretical_loss": 3.358593960409324, "tokens_seen": 2892496896 }, { "epoch": 0.88, "learning_rate": 0.00012461884127748355, "loss": 0.065, "theoretical_loss": 3.3585705845194305, "tokens_seen": 2892759040 }, { "epoch": 0.88, "learning_rate": 0.00012453859733590114, "loss": 0.0657, "theoretical_loss": 3.3585472113408557, "tokens_seen": 2893021184 }, { "epoch": 0.88, "learning_rate": 0.00012445835339431873, "loss": 0.0643, "theoretical_loss": 3.35852384087304, "tokens_seen": 2893283328 }, { "epoch": 0.88, "learning_rate": 0.0001243781094527363, "loss": 0.0644, "theoretical_loss": 3.3585004731154227, "tokens_seen": 2893545472 }, { "epoch": 0.88, "learning_rate": 0.0001242978655111539, "loss": 0.0647, "theoretical_loss": 3.358477108067445, "tokens_seen": 2893807616 }, { "epoch": 0.88, "learning_rate": 0.0001242176215695715, "loss": 0.0669, "theoretical_loss": 3.3584537457285464, "tokens_seen": 2894069760 }, { "epoch": 0.88, "learning_rate": 0.0001241373776279891, "loss": 0.0654, "theoretical_loss": 3.3584303860981675, "tokens_seen": 2894331904 }, { "epoch": 0.88, "learning_rate": 0.00012405713368640668, "loss": 0.0682, "theoretical_loss": 3.3584070291757495, "tokens_seen": 2894594048 }, { "epoch": 0.88, "learning_rate": 0.00012397688974482427, "loss": 0.0641, "theoretical_loss": 3.358383674960733, "tokens_seen": 2894856192 }, { "epoch": 0.88, "learning_rate": 0.00012389664580324185, "loss": 0.0658, "theoretical_loss": 3.358360323452559, "tokens_seen": 2895118336 }, { "epoch": 0.88, "learning_rate": 0.00012381640186165944, "loss": 0.0641, "theoretical_loss": 3.358336974650669, "tokens_seen": 2895380480 }, { "epoch": 0.88, "learning_rate": 0.00012373615792007702, "loss": 0.0618, "theoretical_loss": 3.3583136285545043, "tokens_seen": 2895642624 }, { "epoch": 0.88, "learning_rate": 0.00012365591397849463, "loss": 0.0646, "theoretical_loss": 3.3582902851635064, "tokens_seen": 2895904768 }, { "epoch": 0.88, "learning_rate": 0.00012357567003691222, "loss": 0.0651, "theoretical_loss": 3.3582669444771165, "tokens_seen": 2896166912 }, { "epoch": 0.88, "learning_rate": 0.0001234954260953298, "loss": 0.0636, "theoretical_loss": 3.3582436064947774, "tokens_seen": 2896429056 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.000869004346895963, "objective/train/docs_used": 1053062, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2724812030792236, "objective/train/original_loss": 1.2724812030792236, "objective/train/theoretical_loss": 3.358220271215931, "objective/train/tokens_used": 2917151200, "objective/train/value_avg": -0.0066986083984375, "objective/train/value_loss": 0.000132927336380817, "objective/train/value_max": -1.7523765563964844e-05, "objective/train/value_min": -0.335693359375, "objective/train/value_reward_corr": 0.7094154754724421, "objective/train/value_std": 0.01219940185546875, "objective/train/weight_avg": 1.0009315013885498, "objective/train/weighted_lm_loss": 1.2730143070220947, "objective/train/weights_max": 1.165982961654663, "objective/train/weights_min": 0.3854755163192749, "theoretical_loss": 3.358220271215931, "tokens_seen": 2896691200 }, { "epoch": 0.88, "learning_rate": 0.0001234151821537474, "loss": 0.0642, "theoretical_loss": 3.358220271215931, "tokens_seen": 2896691200 }, { "epoch": 0.88, "learning_rate": 0.00012333493821216498, "loss": 0.0645, "theoretical_loss": 3.358196938640018, "tokens_seen": 2896953344 }, { "epoch": 0.88, "learning_rate": 0.00012325469427058256, "loss": 0.068, "theoretical_loss": 3.358173608766483, "tokens_seen": 2897215488 }, { "epoch": 0.88, "learning_rate": 0.00012317445032900017, "loss": 0.0683, "theoretical_loss": 3.358150281594767, "tokens_seen": 2897477632 }, { "epoch": 0.88, "learning_rate": 0.00012309420638741776, "loss": 0.0646, "theoretical_loss": 3.358126957124313, "tokens_seen": 2897739776 }, { "epoch": 0.88, "learning_rate": 0.00012301396244583535, "loss": 0.065, "theoretical_loss": 3.3581036353545644, "tokens_seen": 2898001920 }, { "epoch": 0.88, "learning_rate": 0.00012293371850425293, "loss": 0.0643, "theoretical_loss": 3.3580803162849637, "tokens_seen": 2898264064 }, { "epoch": 0.88, "learning_rate": 0.00012285347456267052, "loss": 0.0639, "theoretical_loss": 3.3580569999149548, "tokens_seen": 2898526208 }, { "epoch": 0.88, "learning_rate": 0.0001227732306210881, "loss": 0.0632, "theoretical_loss": 3.35803368624398, "tokens_seen": 2898788352 }, { "epoch": 0.88, "learning_rate": 0.0001226929866795057, "loss": 0.0625, "theoretical_loss": 3.358010375271484, "tokens_seen": 2899050496 }, { "epoch": 0.88, "learning_rate": 0.0001226127427379233, "loss": 0.066, "theoretical_loss": 3.357987066996909, "tokens_seen": 2899312640 }, { "epoch": 0.88, "learning_rate": 0.00012253249879634089, "loss": 0.0652, "theoretical_loss": 3.3579637614197004, "tokens_seen": 2899574784 }, { "epoch": 0.88, "learning_rate": 0.00012245225485475847, "loss": 0.0649, "theoretical_loss": 3.3579404585393013, "tokens_seen": 2899836928 }, { "epoch": 0.88, "learning_rate": 0.00012237201091317606, "loss": 0.0672, "theoretical_loss": 3.357917158355156, "tokens_seen": 2900099072 }, { "epoch": 0.88, "learning_rate": 0.00012229176697159364, "loss": 0.0667, "theoretical_loss": 3.3578938608667097, "tokens_seen": 2900361216 }, { "epoch": 0.88, "learning_rate": 0.00012221152303001123, "loss": 0.0624, "theoretical_loss": 3.3578705660734056, "tokens_seen": 2900623360 }, { "epoch": 0.88, "learning_rate": 0.00012213127908842884, "loss": 0.0633, "theoretical_loss": 3.3578472739746896, "tokens_seen": 2900885504 }, { "epoch": 0.88, "learning_rate": 0.00012205103514684641, "loss": 0.064, "theoretical_loss": 3.3578239845700053, "tokens_seen": 2901147648 }, { "epoch": 0.88, "learning_rate": 0.00012197079120526401, "loss": 0.0642, "theoretical_loss": 3.357800697858799, "tokens_seen": 2901409792 }, { "epoch": 0.88, "learning_rate": 0.0001218905472636816, "loss": 0.0643, "theoretical_loss": 3.3577774138405148, "tokens_seen": 2901671936 }, { "epoch": 0.88, "learning_rate": 0.00012181030332209918, "loss": 0.0645, "theoretical_loss": 3.3577541325145988, "tokens_seen": 2901934080 }, { "epoch": 0.88, "learning_rate": 0.00012173005938051678, "loss": 0.0649, "theoretical_loss": 3.357730853880496, "tokens_seen": 2902196224 }, { "epoch": 0.88, "learning_rate": 0.00012164981543893435, "loss": 0.0628, "theoretical_loss": 3.3577075779376524, "tokens_seen": 2902458368 }, { "epoch": 0.88, "learning_rate": 0.00012156957149735195, "loss": 0.0656, "theoretical_loss": 3.3576843046855136, "tokens_seen": 2902720512 }, { "epoch": 0.88, "learning_rate": 0.00012148932755576954, "loss": 0.0638, "theoretical_loss": 3.3576610341235256, "tokens_seen": 2902982656 }, { "epoch": 0.88, "objective/train/advantage_avg": 9.657556074671447e-05, "objective/train/docs_used": 1055596, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.330022931098938, "objective/train/original_loss": 1.3300228118896484, "objective/train/theoretical_loss": 3.357637766251135, "objective/train/tokens_used": 2923704800, "objective/train/value_avg": -0.00839996337890625, "objective/train/value_loss": 0.00018951736274175346, "objective/train/value_max": -3.3736228942871094e-05, "objective/train/value_min": -0.25439453125, "objective/train/value_reward_corr": 0.7220051445696589, "objective/train/value_std": 0.01517486572265625, "objective/train/weight_avg": 1.000184178352356, "objective/train/weighted_lm_loss": 1.3297585248947144, "objective/train/weights_max": 1.1480686664581299, "objective/train/weights_min": 0.36959975957870483, "theoretical_loss": 3.357637766251135, "tokens_seen": 2903244800 }, { "epoch": 0.88, "learning_rate": 0.00012140908361418714, "loss": 0.0647, "theoretical_loss": 3.357637766251135, "tokens_seen": 2903244800 }, { "epoch": 0.88, "learning_rate": 0.00012132883967260472, "loss": 0.0663, "theoretical_loss": 3.3576145010677876, "tokens_seen": 2903506944 }, { "epoch": 0.88, "learning_rate": 0.00012124859573102232, "loss": 0.0622, "theoretical_loss": 3.35759123857293, "tokens_seen": 2903769088 }, { "epoch": 0.88, "learning_rate": 0.0001211683517894399, "loss": 0.0656, "theoretical_loss": 3.357567978766009, "tokens_seen": 2904031232 }, { "epoch": 0.88, "learning_rate": 0.00012108810784785749, "loss": 0.0656, "theoretical_loss": 3.3575447216464718, "tokens_seen": 2904293376 }, { "epoch": 0.88, "learning_rate": 0.00012100786390627508, "loss": 0.0632, "theoretical_loss": 3.3575214672137648, "tokens_seen": 2904555520 }, { "epoch": 0.88, "learning_rate": 0.00012092761996469266, "loss": 0.0653, "theoretical_loss": 3.357498215467335, "tokens_seen": 2904817664 }, { "epoch": 0.88, "learning_rate": 0.00012084737602311026, "loss": 0.06, "theoretical_loss": 3.35747496640663, "tokens_seen": 2905079808 }, { "epoch": 0.88, "learning_rate": 0.00012076713208152785, "loss": 0.0666, "theoretical_loss": 3.3574517200310976, "tokens_seen": 2905341952 }, { "epoch": 0.88, "learning_rate": 0.00012068688813994543, "loss": 0.0611, "theoretical_loss": 3.3574284763401847, "tokens_seen": 2905604096 }, { "epoch": 0.88, "learning_rate": 0.00012060664419836302, "loss": 0.0661, "theoretical_loss": 3.35740523533334, "tokens_seen": 2905866240 }, { "epoch": 0.88, "learning_rate": 0.00012052640025678062, "loss": 0.0648, "theoretical_loss": 3.3573819970100107, "tokens_seen": 2906128384 }, { "epoch": 0.88, "learning_rate": 0.0001204461563151982, "loss": 0.0651, "theoretical_loss": 3.357358761369645, "tokens_seen": 2906390528 }, { "epoch": 0.88, "learning_rate": 0.0001203659123736158, "loss": 0.0635, "theoretical_loss": 3.357335528411692, "tokens_seen": 2906652672 }, { "epoch": 0.88, "learning_rate": 0.00012028566843203339, "loss": 0.0677, "theoretical_loss": 3.357312298135599, "tokens_seen": 2906914816 }, { "epoch": 0.88, "learning_rate": 0.00012020542449045097, "loss": 0.0631, "theoretical_loss": 3.3572890705408156, "tokens_seen": 2907176960 }, { "epoch": 0.88, "learning_rate": 0.00012012518054886856, "loss": 0.065, "theoretical_loss": 3.35726584562679, "tokens_seen": 2907439104 }, { "epoch": 0.88, "learning_rate": 0.00012004493660728616, "loss": 0.0653, "theoretical_loss": 3.357242623392971, "tokens_seen": 2907701248 }, { "epoch": 0.88, "learning_rate": 0.00011996469266570374, "loss": 0.0636, "theoretical_loss": 3.3572194038388083, "tokens_seen": 2907963392 }, { "epoch": 0.88, "learning_rate": 0.00011988444872412133, "loss": 0.0647, "theoretical_loss": 3.3571961869637508, "tokens_seen": 2908225536 }, { "epoch": 0.88, "learning_rate": 0.00011980420478253893, "loss": 0.0633, "theoretical_loss": 3.357172972767248, "tokens_seen": 2908487680 }, { "epoch": 0.88, "learning_rate": 0.0001197239608409565, "loss": 0.0637, "theoretical_loss": 3.357149761248749, "tokens_seen": 2908749824 }, { "epoch": 0.88, "learning_rate": 0.0001196437168993741, "loss": 0.065, "theoretical_loss": 3.357126552407705, "tokens_seen": 2909011968 }, { "epoch": 0.88, "learning_rate": 0.00011956347295779169, "loss": 0.066, "theoretical_loss": 3.3571033462435644, "tokens_seen": 2909274112 }, { "epoch": 0.88, "learning_rate": 0.00011948322901620928, "loss": 0.0667, "theoretical_loss": 3.3570801427557777, "tokens_seen": 2909536256 }, { "epoch": 0.88, "objective/train/advantage_avg": -0.0011890976456925273, "objective/train/docs_used": 1057798, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2657890319824219, "objective/train/original_loss": 1.2657889127731323, "objective/train/theoretical_loss": 3.357056941943796, "objective/train/tokens_used": 2930258400, "objective/train/value_avg": -0.009063720703125, "objective/train/value_loss": 0.0001873580040410161, "objective/train/value_max": -2.2649765014648438e-05, "objective/train/value_min": -0.325439453125, "objective/train/value_reward_corr": 0.9082124581128177, "objective/train/value_std": 0.023529052734375, "objective/train/weight_avg": 0.9988996982574463, "objective/train/weighted_lm_loss": 1.2639284133911133, "objective/train/weights_max": 1.3067957162857056, "objective/train/weights_min": 0.37186679244041443, "theoretical_loss": 3.357056941943796, "tokens_seen": 2909798400 }, { "epoch": 0.88, "learning_rate": 0.00011940298507462687, "loss": 0.0629, "theoretical_loss": 3.357056941943796, "tokens_seen": 2909798400 }, { "epoch": 0.88, "learning_rate": 0.00011932274113304447, "loss": 0.0643, "theoretical_loss": 3.3570337438070683, "tokens_seen": 2910060544 }, { "epoch": 0.88, "learning_rate": 0.00011924249719146204, "loss": 0.0667, "theoretical_loss": 3.357010548345046, "tokens_seen": 2910322688 }, { "epoch": 0.88, "learning_rate": 0.00011916225324987964, "loss": 0.0668, "theoretical_loss": 3.3569873555571803, "tokens_seen": 2910584832 }, { "epoch": 0.88, "learning_rate": 0.00011908200930829723, "loss": 0.066, "theoretical_loss": 3.3569641654429208, "tokens_seen": 2910846976 }, { "epoch": 0.88, "learning_rate": 0.00011900176536671481, "loss": 0.0643, "theoretical_loss": 3.3569409780017194, "tokens_seen": 2911109120 }, { "epoch": 0.88, "learning_rate": 0.00011892152142513241, "loss": 0.0648, "theoretical_loss": 3.356917793233027, "tokens_seen": 2911371264 }, { "epoch": 0.88, "learning_rate": 0.00011884127748355, "loss": 0.0656, "theoretical_loss": 3.356894611136296, "tokens_seen": 2911633408 }, { "epoch": 0.88, "learning_rate": 0.00011876103354196758, "loss": 0.0648, "theoretical_loss": 3.3568714317109762, "tokens_seen": 2911895552 }, { "epoch": 0.88, "learning_rate": 0.00011868078960038517, "loss": 0.0627, "theoretical_loss": 3.3568482549565206, "tokens_seen": 2912157696 }, { "epoch": 0.88, "learning_rate": 0.00011860054565880277, "loss": 0.0639, "theoretical_loss": 3.3568250808723805, "tokens_seen": 2912419840 }, { "epoch": 0.88, "learning_rate": 0.00011852030171722035, "loss": 0.0671, "theoretical_loss": 3.356801909458008, "tokens_seen": 2912681984 }, { "epoch": 0.88, "learning_rate": 0.00011844005777563795, "loss": 0.0643, "theoretical_loss": 3.3567787407128558, "tokens_seen": 2912944128 }, { "epoch": 0.88, "learning_rate": 0.00011835981383405554, "loss": 0.065, "theoretical_loss": 3.3567555746363755, "tokens_seen": 2913206272 }, { "epoch": 0.88, "learning_rate": 0.00011827956989247312, "loss": 0.0666, "theoretical_loss": 3.35673241122802, "tokens_seen": 2913468416 }, { "epoch": 0.88, "learning_rate": 0.0001181993259508907, "loss": 0.0664, "theoretical_loss": 3.356709250487242, "tokens_seen": 2913730560 }, { "epoch": 0.88, "learning_rate": 0.00011811908200930829, "loss": 0.0636, "theoretical_loss": 3.356686092413494, "tokens_seen": 2913992704 }, { "epoch": 0.88, "learning_rate": 0.00011803883806772589, "loss": 0.064, "theoretical_loss": 3.3566629370062295, "tokens_seen": 2914254848 }, { "epoch": 0.88, "learning_rate": 0.00011795859412614348, "loss": 0.0669, "theoretical_loss": 3.3566397842649014, "tokens_seen": 2914516992 }, { "epoch": 0.88, "learning_rate": 0.00011787835018456108, "loss": 0.0666, "theoretical_loss": 3.356616634188963, "tokens_seen": 2914779136 }, { "epoch": 0.88, "learning_rate": 0.00011779810624297865, "loss": 0.0652, "theoretical_loss": 3.356593486777868, "tokens_seen": 2915041280 }, { "epoch": 0.88, "learning_rate": 0.00011771786230139625, "loss": 0.0641, "theoretical_loss": 3.3565703420310697, "tokens_seen": 2915303424 }, { "epoch": 0.88, "learning_rate": 0.00011763761835981383, "loss": 0.066, "theoretical_loss": 3.356547199948022, "tokens_seen": 2915565568 }, { "epoch": 0.88, "learning_rate": 0.00011755737441823143, "loss": 0.0662, "theoretical_loss": 3.356524060528179, "tokens_seen": 2915827712 }, { "epoch": 0.88, "learning_rate": 0.00011747713047664902, "loss": 0.0642, "theoretical_loss": 3.356500923770995, "tokens_seen": 2916089856 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.00014633123646490276, "objective/train/docs_used": 1060149, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2154415845870972, "objective/train/original_loss": 1.2154414653778076, "objective/train/theoretical_loss": 3.3564777896759237, "objective/train/tokens_used": 2936812000, "objective/train/value_avg": -0.00882720947265625, "objective/train/value_loss": 0.00020796981698367745, "objective/train/value_max": -1.6570091247558594e-05, "objective/train/value_min": -0.29736328125, "objective/train/value_reward_corr": 0.8001300778151015, "objective/train/value_std": 0.01922607421875, "objective/train/weight_avg": 1.0002456903457642, "objective/train/weighted_lm_loss": 1.215817928314209, "objective/train/weights_max": 1.2599588632583618, "objective/train/weights_min": 0.3954486548900604, "theoretical_loss": 3.3564777896759237, "tokens_seen": 2916352000 }, { "epoch": 0.88, "learning_rate": 0.00011739688653506662, "loss": 0.066, "theoretical_loss": 3.3564777896759237, "tokens_seen": 2916352000 }, { "epoch": 0.88, "learning_rate": 0.00011731664259348419, "loss": 0.0654, "theoretical_loss": 3.35645465824242, "tokens_seen": 2916614144 }, { "epoch": 0.88, "learning_rate": 0.00011723639865190179, "loss": 0.0634, "theoretical_loss": 3.3564315294699383, "tokens_seen": 2916876288 }, { "epoch": 0.88, "learning_rate": 0.00011715615471031937, "loss": 0.0652, "theoretical_loss": 3.3564084033579333, "tokens_seen": 2917138432 }, { "epoch": 0.88, "learning_rate": 0.00011707591076873696, "loss": 0.0655, "theoretical_loss": 3.3563852799058607, "tokens_seen": 2917400576 }, { "epoch": 0.88, "learning_rate": 0.00011699566682715456, "loss": 0.0633, "theoretical_loss": 3.3563621591131745, "tokens_seen": 2917662720 }, { "epoch": 0.88, "learning_rate": 0.00011691542288557214, "loss": 0.0644, "theoretical_loss": 3.3563390409793303, "tokens_seen": 2917924864 }, { "epoch": 0.88, "learning_rate": 0.00011683517894398973, "loss": 0.0663, "theoretical_loss": 3.356315925503784, "tokens_seen": 2918187008 }, { "epoch": 0.88, "learning_rate": 0.00011675493500240731, "loss": 0.0656, "theoretical_loss": 3.356292812685991, "tokens_seen": 2918449152 }, { "epoch": 0.88, "learning_rate": 0.00011667469106082491, "loss": 0.0634, "theoretical_loss": 3.3562697025254065, "tokens_seen": 2918711296 }, { "epoch": 0.88, "learning_rate": 0.0001165944471192425, "loss": 0.0644, "theoretical_loss": 3.356246595021487, "tokens_seen": 2918973440 }, { "epoch": 0.88, "learning_rate": 0.0001165142031776601, "loss": 0.0681, "theoretical_loss": 3.356223490173688, "tokens_seen": 2919235584 }, { "epoch": 0.88, "learning_rate": 0.00011643395923607768, "loss": 0.0647, "theoretical_loss": 3.356200387981466, "tokens_seen": 2919497728 }, { "epoch": 0.88, "learning_rate": 0.00011635371529449527, "loss": 0.0668, "theoretical_loss": 3.3561772884442775, "tokens_seen": 2919759872 }, { "epoch": 0.88, "learning_rate": 0.00011627347135291285, "loss": 0.0636, "theoretical_loss": 3.356154191561579, "tokens_seen": 2920022016 }, { "epoch": 0.88, "learning_rate": 0.00011619322741133044, "loss": 0.066, "theoretical_loss": 3.356131097332827, "tokens_seen": 2920284160 }, { "epoch": 0.89, "learning_rate": 0.00011611298346974804, "loss": 0.0662, "theoretical_loss": 3.356108005757479, "tokens_seen": 2920546304 }, { "epoch": 0.89, "learning_rate": 0.00011603273952816562, "loss": 0.0686, "theoretical_loss": 3.3560849168349907, "tokens_seen": 2920808448 }, { "epoch": 0.89, "learning_rate": 0.00011595249558658322, "loss": 0.0648, "theoretical_loss": 3.3560618305648204, "tokens_seen": 2921070592 }, { "epoch": 0.89, "learning_rate": 0.0001158722516450008, "loss": 0.0636, "theoretical_loss": 3.3560387469464255, "tokens_seen": 2921332736 }, { "epoch": 0.89, "learning_rate": 0.0001157920077034184, "loss": 0.0671, "theoretical_loss": 3.356015665979262, "tokens_seen": 2921594880 }, { "epoch": 0.89, "learning_rate": 0.00011571176376183598, "loss": 0.065, "theoretical_loss": 3.3559925876627896, "tokens_seen": 2921857024 }, { "epoch": 0.89, "learning_rate": 0.00011563151982025358, "loss": 0.0652, "theoretical_loss": 3.3559695119964648, "tokens_seen": 2922119168 }, { "epoch": 0.89, "learning_rate": 0.00011555127587867116, "loss": 0.0641, "theoretical_loss": 3.355946438979746, "tokens_seen": 2922381312 }, { "epoch": 0.89, "learning_rate": 0.00011547103193708876, "loss": 0.0648, "theoretical_loss": 3.3559233686120913, "tokens_seen": 2922643456 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.00019381623133085668, "objective/train/docs_used": 1062456, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2408287525177002, "objective/train/original_loss": 1.2408287525177002, "objective/train/theoretical_loss": 3.3559003008929587, "objective/train/tokens_used": 2943365600, "objective/train/value_avg": -0.00800323486328125, "objective/train/value_loss": 0.00016172441246453673, "objective/train/value_max": -2.7120113372802734e-05, "objective/train/value_min": -0.5068359375, "objective/train/value_reward_corr": 0.7553830884393562, "objective/train/value_std": 0.0154571533203125, "objective/train/weight_avg": 1.0002673864364624, "objective/train/weighted_lm_loss": 1.2412976026535034, "objective/train/weights_max": 1.332244634628296, "objective/train/weights_min": 0.3701034188270569, "theoretical_loss": 3.3559003008929587, "tokens_seen": 2922905600 }, { "epoch": 0.89, "learning_rate": 0.00011539078799550633, "loss": 0.0644, "theoretical_loss": 3.3559003008929587, "tokens_seen": 2922905600 }, { "epoch": 0.89, "learning_rate": 0.00011531054405392392, "loss": 0.0637, "theoretical_loss": 3.355877235821807, "tokens_seen": 2923167744 }, { "epoch": 0.89, "learning_rate": 0.00011523030011234152, "loss": 0.0647, "theoretical_loss": 3.355854173398095, "tokens_seen": 2923429888 }, { "epoch": 0.89, "learning_rate": 0.0001151500561707591, "loss": 0.067, "theoretical_loss": 3.3558311136212806, "tokens_seen": 2923692032 }, { "epoch": 0.89, "learning_rate": 0.0001150698122291767, "loss": 0.0648, "theoretical_loss": 3.355808056490824, "tokens_seen": 2923954176 }, { "epoch": 0.89, "learning_rate": 0.00011498956828759429, "loss": 0.0677, "theoretical_loss": 3.3557850020061832, "tokens_seen": 2924216320 }, { "epoch": 0.89, "learning_rate": 0.00011490932434601187, "loss": 0.066, "theoretical_loss": 3.355761950166818, "tokens_seen": 2924478464 }, { "epoch": 0.89, "learning_rate": 0.00011482908040442946, "loss": 0.0693, "theoretical_loss": 3.355738900972187, "tokens_seen": 2924740608 }, { "epoch": 0.89, "learning_rate": 0.00011474883646284706, "loss": 0.0644, "theoretical_loss": 3.355715854421751, "tokens_seen": 2925002752 }, { "epoch": 0.89, "learning_rate": 0.00011466859252126464, "loss": 0.0662, "theoretical_loss": 3.3556928105149693, "tokens_seen": 2925264896 }, { "epoch": 0.89, "learning_rate": 0.00011458834857968224, "loss": 0.0638, "theoretical_loss": 3.355669769251301, "tokens_seen": 2925527040 }, { "epoch": 0.89, "learning_rate": 0.00011450810463809983, "loss": 0.0648, "theoretical_loss": 3.355646730630207, "tokens_seen": 2925789184 }, { "epoch": 0.89, "learning_rate": 0.00011442786069651743, "loss": 0.0669, "theoretical_loss": 3.3556236946511473, "tokens_seen": 2926051328 }, { "epoch": 0.89, "learning_rate": 0.000114347616754935, "loss": 0.0621, "theoretical_loss": 3.355600661313582, "tokens_seen": 2926313472 }, { "epoch": 0.89, "learning_rate": 0.00011426737281335259, "loss": 0.0642, "theoretical_loss": 3.3555776306169722, "tokens_seen": 2926575616 }, { "epoch": 0.89, "learning_rate": 0.00011418712887177019, "loss": 0.0639, "theoretical_loss": 3.355554602560778, "tokens_seen": 2926837760 }, { "epoch": 0.89, "learning_rate": 0.00011410688493018777, "loss": 0.0642, "theoretical_loss": 3.3555315771444603, "tokens_seen": 2927099904 }, { "epoch": 0.89, "learning_rate": 0.00011402664098860537, "loss": 0.0669, "theoretical_loss": 3.3555085543674803, "tokens_seen": 2927362048 }, { "epoch": 0.89, "learning_rate": 0.00011394639704702294, "loss": 0.0669, "theoretical_loss": 3.355485534229299, "tokens_seen": 2927624192 }, { "epoch": 0.89, "learning_rate": 0.00011386615310544054, "loss": 0.064, "theoretical_loss": 3.3554625167293777, "tokens_seen": 2927886336 }, { "epoch": 0.89, "learning_rate": 0.00011378590916385813, "loss": 0.0655, "theoretical_loss": 3.355439501867178, "tokens_seen": 2928148480 }, { "epoch": 0.89, "learning_rate": 0.00011370566522227573, "loss": 0.0635, "theoretical_loss": 3.3554164896421614, "tokens_seen": 2928410624 }, { "epoch": 0.89, "learning_rate": 0.00011362542128069331, "loss": 0.066, "theoretical_loss": 3.35539348005379, "tokens_seen": 2928672768 }, { "epoch": 0.89, "learning_rate": 0.00011354517733911091, "loss": 0.0656, "theoretical_loss": 3.3553704731015253, "tokens_seen": 2928934912 }, { "epoch": 0.89, "learning_rate": 0.0001134649333975285, "loss": 0.063, "theoretical_loss": 3.3553474687848297, "tokens_seen": 2929197056 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.0012888078344985843, "objective/train/docs_used": 1064613, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.31832754611969, "objective/train/original_loss": 1.31832754611969, "objective/train/theoretical_loss": 3.355324467103165, "objective/train/tokens_used": 2949919200, "objective/train/value_avg": -0.00921630859375, "objective/train/value_loss": 0.0002458643866702914, "objective/train/value_max": -2.9981136322021484e-05, "objective/train/value_min": -0.345703125, "objective/train/value_reward_corr": 0.6926176165417648, "objective/train/value_std": 0.015594482421875, "objective/train/weight_avg": 1.0013985633850098, "objective/train/weighted_lm_loss": 1.319889783859253, "objective/train/weights_max": 1.2300209999084473, "objective/train/weights_min": 0.3723408579826355, "theoretical_loss": 3.355324467103165, "tokens_seen": 2929459200 }, { "epoch": 0.89, "learning_rate": 0.00011338468945594607, "loss": 0.0656, "theoretical_loss": 3.355324467103165, "tokens_seen": 2929459200 }, { "epoch": 0.89, "learning_rate": 0.00011330444551436367, "loss": 0.0661, "theoretical_loss": 3.3553014680559943, "tokens_seen": 2929721344 }, { "epoch": 0.89, "learning_rate": 0.00011322420157278125, "loss": 0.0675, "theoretical_loss": 3.3552784716427797, "tokens_seen": 2929983488 }, { "epoch": 0.89, "learning_rate": 0.00011314395763119885, "loss": 0.0626, "theoretical_loss": 3.355255477862984, "tokens_seen": 2930245632 }, { "epoch": 0.89, "learning_rate": 0.00011306371368961644, "loss": 0.0649, "theoretical_loss": 3.35523248671607, "tokens_seen": 2930507776 }, { "epoch": 0.89, "learning_rate": 0.00011298346974803404, "loss": 0.068, "theoretical_loss": 3.3552094982015013, "tokens_seen": 2930769920 }, { "epoch": 0.89, "learning_rate": 0.00011290322580645161, "loss": 0.0665, "theoretical_loss": 3.35518651231874, "tokens_seen": 2931032064 }, { "epoch": 0.89, "learning_rate": 0.0001128229818648692, "loss": 0.0656, "theoretical_loss": 3.3551635290672506, "tokens_seen": 2931294208 }, { "epoch": 0.89, "learning_rate": 0.00011274273792328679, "loss": 0.0674, "theoretical_loss": 3.3551405484464962, "tokens_seen": 2931556352 }, { "epoch": 0.89, "learning_rate": 0.00011266249398170439, "loss": 0.0622, "theoretical_loss": 3.35511757045594, "tokens_seen": 2931818496 }, { "epoch": 0.89, "learning_rate": 0.00011258225004012198, "loss": 0.0631, "theoretical_loss": 3.3550945950950473, "tokens_seen": 2932080640 }, { "epoch": 0.89, "learning_rate": 0.00011250200609853958, "loss": 0.0632, "theoretical_loss": 3.35507162236328, "tokens_seen": 2932342784 }, { "epoch": 0.89, "learning_rate": 0.00011242176215695715, "loss": 0.0668, "theoretical_loss": 3.3550486522601037, "tokens_seen": 2932604928 }, { "epoch": 0.89, "learning_rate": 0.00011234151821537473, "loss": 0.0666, "theoretical_loss": 3.355025684784982, "tokens_seen": 2932867072 }, { "epoch": 0.89, "learning_rate": 0.00011226127427379233, "loss": 0.0638, "theoretical_loss": 3.35500271993738, "tokens_seen": 2933129216 }, { "epoch": 0.89, "learning_rate": 0.00011218103033220992, "loss": 0.0634, "theoretical_loss": 3.354979757716762, "tokens_seen": 2933391360 }, { "epoch": 0.89, "learning_rate": 0.00011210078639062752, "loss": 0.0636, "theoretical_loss": 3.3549567981225925, "tokens_seen": 2933653504 }, { "epoch": 0.89, "learning_rate": 0.0001120205424490451, "loss": 0.0651, "theoretical_loss": 3.354933841154337, "tokens_seen": 2933915648 }, { "epoch": 0.89, "learning_rate": 0.00011194029850746269, "loss": 0.0654, "theoretical_loss": 3.35491088681146, "tokens_seen": 2934177792 }, { "epoch": 0.89, "learning_rate": 0.00011186005456588027, "loss": 0.0654, "theoretical_loss": 3.3548879350934273, "tokens_seen": 2934439936 }, { "epoch": 0.89, "learning_rate": 0.00011177981062429787, "loss": 0.065, "theoretical_loss": 3.3548649859997033, "tokens_seen": 2934702080 }, { "epoch": 0.89, "learning_rate": 0.00011169956668271546, "loss": 0.0658, "theoretical_loss": 3.354842039529755, "tokens_seen": 2934964224 }, { "epoch": 0.89, "learning_rate": 0.00011161932274113306, "loss": 0.0658, "theoretical_loss": 3.3548190956830473, "tokens_seen": 2935226368 }, { "epoch": 0.89, "learning_rate": 0.00011153907879955064, "loss": 0.0631, "theoretical_loss": 3.354796154459046, "tokens_seen": 2935488512 }, { "epoch": 0.89, "learning_rate": 0.00011145883485796821, "loss": 0.0643, "theoretical_loss": 3.354773215857217, "tokens_seen": 2935750656 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.0004477580660022795, "objective/train/docs_used": 1067139, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2940055131912231, "objective/train/original_loss": 1.2940055131912231, "objective/train/theoretical_loss": 3.354750279877027, "objective/train/tokens_used": 2956472800, "objective/train/value_avg": -0.008087158203125, "objective/train/value_loss": 0.00039907018071971834, "objective/train/value_max": -3.8504600524902344e-05, "objective/train/value_min": -0.497802734375, "objective/train/value_reward_corr": 0.6092954334011763, "objective/train/value_std": 0.01544952392578125, "objective/train/weight_avg": 1.000596284866333, "objective/train/weighted_lm_loss": 1.2948415279388428, "objective/train/weights_max": 1.4681942462921143, "objective/train/weights_min": 0.05272703617811203, "theoretical_loss": 3.354750279877027, "tokens_seen": 2936012800 }, { "epoch": 0.89, "learning_rate": 0.00011137859091638581, "loss": 0.0647, "theoretical_loss": 3.354750279877027, "tokens_seen": 2936012800 }, { "epoch": 0.89, "learning_rate": 0.0001112983469748034, "loss": 0.0648, "theoretical_loss": 3.3547273465179424, "tokens_seen": 2936274944 }, { "epoch": 0.89, "learning_rate": 0.000111218103033221, "loss": 0.0622, "theoretical_loss": 3.354704415779429, "tokens_seen": 2936537088 }, { "epoch": 0.89, "learning_rate": 0.00011113785909163858, "loss": 0.0635, "theoretical_loss": 3.3546814876609536, "tokens_seen": 2936799232 }, { "epoch": 0.89, "learning_rate": 0.00011105761515005618, "loss": 0.0664, "theoretical_loss": 3.354658562161984, "tokens_seen": 2937061376 }, { "epoch": 0.89, "learning_rate": 0.00011097737120847375, "loss": 0.0647, "theoretical_loss": 3.354635639281986, "tokens_seen": 2937323520 }, { "epoch": 0.89, "learning_rate": 0.00011089712726689135, "loss": 0.0656, "theoretical_loss": 3.3546127190204276, "tokens_seen": 2937585664 }, { "epoch": 0.89, "learning_rate": 0.00011081688332530894, "loss": 0.0664, "theoretical_loss": 3.354589801376775, "tokens_seen": 2937847808 }, { "epoch": 0.89, "learning_rate": 0.00011073663938372654, "loss": 0.0641, "theoretical_loss": 3.3545668863504963, "tokens_seen": 2938109952 }, { "epoch": 0.89, "learning_rate": 0.00011065639544214412, "loss": 0.066, "theoretical_loss": 3.3545439739410594, "tokens_seen": 2938372096 }, { "epoch": 0.89, "learning_rate": 0.00011057615150056171, "loss": 0.0607, "theoretical_loss": 3.3545210641479315, "tokens_seen": 2938634240 }, { "epoch": 0.89, "learning_rate": 0.0001104959075589793, "loss": 0.0643, "theoretical_loss": 3.354498156970581, "tokens_seen": 2938896384 }, { "epoch": 0.89, "learning_rate": 0.00011041566361739688, "loss": 0.0653, "theoretical_loss": 3.3544752524084753, "tokens_seen": 2939158528 }, { "epoch": 0.89, "learning_rate": 0.00011033541967581448, "loss": 0.0655, "theoretical_loss": 3.3544523504610826, "tokens_seen": 2939420672 }, { "epoch": 0.89, "learning_rate": 0.00011025517573423206, "loss": 0.0662, "theoretical_loss": 3.354429451127872, "tokens_seen": 2939682816 }, { "epoch": 0.89, "learning_rate": 0.00011017493179264966, "loss": 0.0669, "theoretical_loss": 3.354406554408312, "tokens_seen": 2939944960 }, { "epoch": 0.89, "learning_rate": 0.00011009468785106725, "loss": 0.0638, "theoretical_loss": 3.3543836603018704, "tokens_seen": 2940207104 }, { "epoch": 0.89, "learning_rate": 0.00011001444390948483, "loss": 0.0655, "theoretical_loss": 3.354360768808017, "tokens_seen": 2940469248 }, { "epoch": 0.89, "learning_rate": 0.00010993419996790242, "loss": 0.0655, "theoretical_loss": 3.35433787992622, "tokens_seen": 2940731392 }, { "epoch": 0.89, "learning_rate": 0.00010985395602632002, "loss": 0.0667, "theoretical_loss": 3.354314993655949, "tokens_seen": 2940993536 }, { "epoch": 0.89, "learning_rate": 0.0001097737120847376, "loss": 0.064, "theoretical_loss": 3.354292109996673, "tokens_seen": 2941255680 }, { "epoch": 0.89, "learning_rate": 0.0001096934681431552, "loss": 0.0646, "theoretical_loss": 3.354269228947862, "tokens_seen": 2941517824 }, { "epoch": 0.89, "learning_rate": 0.00010961322420157279, "loss": 0.0663, "theoretical_loss": 3.354246350508985, "tokens_seen": 2941779968 }, { "epoch": 0.89, "learning_rate": 0.00010953298025999036, "loss": 0.0628, "theoretical_loss": 3.354223474679512, "tokens_seen": 2942042112 }, { "epoch": 0.89, "learning_rate": 0.00010945273631840796, "loss": 0.0648, "theoretical_loss": 3.354200601458913, "tokens_seen": 2942304256 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.000351957103703171, "objective/train/docs_used": 1069745, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.43468177318573, "objective/train/original_loss": 1.43468177318573, "objective/train/theoretical_loss": 3.354177730846658, "objective/train/tokens_used": 2963026400, "objective/train/value_avg": -0.007633209228515625, "objective/train/value_loss": 0.0003218376077711582, "objective/train/value_max": -3.349781036376953e-05, "objective/train/value_min": -0.71435546875, "objective/train/value_reward_corr": 0.7403361391563008, "objective/train/value_std": 0.0193939208984375, "objective/train/weight_avg": 1.0004934072494507, "objective/train/weighted_lm_loss": 1.4347542524337769, "objective/train/weights_max": 1.8865798711776733, "objective/train/weights_min": 0.23063500225543976, "theoretical_loss": 3.354177730846658, "tokens_seen": 2942566400 }, { "epoch": 0.89, "learning_rate": 0.00010937249237682555, "loss": 0.0688, "theoretical_loss": 3.354177730846658, "tokens_seen": 2942566400 }, { "epoch": 0.89, "learning_rate": 0.00010929224843524314, "loss": 0.0654, "theoretical_loss": 3.354154862842217, "tokens_seen": 2942828544 }, { "epoch": 0.89, "learning_rate": 0.00010921200449366073, "loss": 0.0664, "theoretical_loss": 3.354131997445061, "tokens_seen": 2943090688 }, { "epoch": 0.89, "learning_rate": 0.00010913176055207833, "loss": 0.0644, "theoretical_loss": 3.3541091346546597, "tokens_seen": 2943352832 }, { "epoch": 0.89, "learning_rate": 0.0001090515166104959, "loss": 0.0651, "theoretical_loss": 3.3540862744704842, "tokens_seen": 2943614976 }, { "epoch": 0.89, "learning_rate": 0.0001089712726689135, "loss": 0.0661, "theoretical_loss": 3.3540634168920063, "tokens_seen": 2943877120 }, { "epoch": 0.89, "learning_rate": 0.00010889102872733109, "loss": 0.0668, "theoretical_loss": 3.354040561918695, "tokens_seen": 2944139264 }, { "epoch": 0.89, "learning_rate": 0.00010881078478574869, "loss": 0.0681, "theoretical_loss": 3.354017709550023, "tokens_seen": 2944401408 }, { "epoch": 0.89, "learning_rate": 0.00010873054084416627, "loss": 0.0643, "theoretical_loss": 3.353994859785461, "tokens_seen": 2944663552 }, { "epoch": 0.89, "learning_rate": 0.00010865029690258386, "loss": 0.0668, "theoretical_loss": 3.353972012624481, "tokens_seen": 2944925696 }, { "epoch": 0.89, "learning_rate": 0.00010857005296100144, "loss": 0.0654, "theoretical_loss": 3.3539491680665545, "tokens_seen": 2945187840 }, { "epoch": 0.89, "learning_rate": 0.00010848980901941903, "loss": 0.066, "theoretical_loss": 3.3539263261111523, "tokens_seen": 2945449984 }, { "epoch": 0.89, "learning_rate": 0.00010840956507783663, "loss": 0.067, "theoretical_loss": 3.3539034867577473, "tokens_seen": 2945712128 }, { "epoch": 0.89, "learning_rate": 0.00010832932113625421, "loss": 0.0655, "theoretical_loss": 3.353880650005811, "tokens_seen": 2945974272 }, { "epoch": 0.89, "learning_rate": 0.00010824907719467181, "loss": 0.0649, "theoretical_loss": 3.3538578158548167, "tokens_seen": 2946236416 }, { "epoch": 0.89, "learning_rate": 0.0001081688332530894, "loss": 0.0662, "theoretical_loss": 3.3538349843042354, "tokens_seen": 2946498560 }, { "epoch": 0.89, "learning_rate": 0.00010808858931150698, "loss": 0.0654, "theoretical_loss": 3.3538121553535407, "tokens_seen": 2946760704 }, { "epoch": 0.89, "learning_rate": 0.00010800834536992457, "loss": 0.0671, "theoretical_loss": 3.353789329002205, "tokens_seen": 2947022848 }, { "epoch": 0.89, "learning_rate": 0.00010792810142834217, "loss": 0.0656, "theoretical_loss": 3.3537665052497005, "tokens_seen": 2947284992 }, { "epoch": 0.89, "learning_rate": 0.00010784785748675975, "loss": 0.0645, "theoretical_loss": 3.353743684095501, "tokens_seen": 2947547136 }, { "epoch": 0.89, "learning_rate": 0.00010776761354517735, "loss": 0.0649, "theoretical_loss": 3.3537208655390796, "tokens_seen": 2947809280 }, { "epoch": 0.89, "learning_rate": 0.00010768736960359494, "loss": 0.0653, "theoretical_loss": 3.353698049579909, "tokens_seen": 2948071424 }, { "epoch": 0.89, "learning_rate": 0.00010760712566201251, "loss": 0.0631, "theoretical_loss": 3.3536752362174633, "tokens_seen": 2948333568 }, { "epoch": 0.89, "learning_rate": 0.00010752688172043011, "loss": 0.0629, "theoretical_loss": 3.3536524254512163, "tokens_seen": 2948595712 }, { "epoch": 0.89, "learning_rate": 0.00010744663777884769, "loss": 0.0651, "theoretical_loss": 3.353629617280641, "tokens_seen": 2948857856 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.0009776019724085927, "objective/train/docs_used": 1072071, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.184030532836914, "objective/train/original_loss": 1.1840304136276245, "objective/train/theoretical_loss": 3.3536068117052116, "objective/train/tokens_used": 2969580000, "objective/train/value_avg": -0.0084228515625, "objective/train/value_loss": 0.0003648189886007458, "objective/train/value_max": -2.0325183868408203e-05, "objective/train/value_min": -0.89697265625, "objective/train/value_reward_corr": 0.6921030821127245, "objective/train/value_std": 0.02081298828125, "objective/train/weight_avg": 1.0011451244354248, "objective/train/weighted_lm_loss": 1.1850285530090332, "objective/train/weights_max": 2.162311553955078, "objective/train/weights_min": 0.37333929538726807, "theoretical_loss": 3.3536068117052116, "tokens_seen": 2949120000 }, { "epoch": 0.89, "learning_rate": 0.00010736639383726529, "loss": 0.0643, "theoretical_loss": 3.3536068117052116, "tokens_seen": 2949120000 }, { "epoch": 0.89, "learning_rate": 0.00010728614989568288, "loss": 0.0615, "theoretical_loss": 3.3535840087244027, "tokens_seen": 2949382144 }, { "epoch": 0.89, "learning_rate": 0.00010720590595410048, "loss": 0.0661, "theoretical_loss": 3.3535612083376876, "tokens_seen": 2949644288 }, { "epoch": 0.89, "learning_rate": 0.00010712566201251805, "loss": 0.0618, "theoretical_loss": 3.353538410544542, "tokens_seen": 2949906432 }, { "epoch": 0.89, "learning_rate": 0.00010704541807093565, "loss": 0.0645, "theoretical_loss": 3.3535156153444388, "tokens_seen": 2950168576 }, { "epoch": 0.89, "learning_rate": 0.00010696517412935323, "loss": 0.0647, "theoretical_loss": 3.353492822736854, "tokens_seen": 2950430720 }, { "epoch": 0.89, "learning_rate": 0.00010688493018777083, "loss": 0.0629, "theoretical_loss": 3.353470032721262, "tokens_seen": 2950692864 }, { "epoch": 0.89, "learning_rate": 0.00010680468624618842, "loss": 0.066, "theoretical_loss": 3.3534472452971373, "tokens_seen": 2950955008 }, { "epoch": 0.89, "learning_rate": 0.000106724442304606, "loss": 0.0622, "theoretical_loss": 3.3534244604639563, "tokens_seen": 2951217152 }, { "epoch": 0.89, "learning_rate": 0.00010664419836302359, "loss": 0.064, "theoretical_loss": 3.353401678221193, "tokens_seen": 2951479296 }, { "epoch": 0.89, "learning_rate": 0.00010656395442144117, "loss": 0.0636, "theoretical_loss": 3.3533788985683235, "tokens_seen": 2951741440 }, { "epoch": 0.89, "learning_rate": 0.00010648371047985877, "loss": 0.0677, "theoretical_loss": 3.3533561215048233, "tokens_seen": 2952003584 }, { "epoch": 0.89, "learning_rate": 0.00010640346653827636, "loss": 0.0635, "theoretical_loss": 3.3533333470301683, "tokens_seen": 2952265728 }, { "epoch": 0.89, "learning_rate": 0.00010632322259669396, "loss": 0.0658, "theoretical_loss": 3.3533105751438343, "tokens_seen": 2952527872 }, { "epoch": 0.89, "learning_rate": 0.00010624297865511154, "loss": 0.0631, "theoretical_loss": 3.3532878058452975, "tokens_seen": 2952790016 }, { "epoch": 0.89, "learning_rate": 0.00010616273471352913, "loss": 0.0651, "theoretical_loss": 3.3532650391340337, "tokens_seen": 2953052160 }, { "epoch": 0.89, "learning_rate": 0.00010608249077194671, "loss": 0.0654, "theoretical_loss": 3.35324227500952, "tokens_seen": 2953314304 }, { "epoch": 0.9, "learning_rate": 0.00010600224683036431, "loss": 0.0614, "theoretical_loss": 3.353219513471232, "tokens_seen": 2953576448 }, { "epoch": 0.9, "learning_rate": 0.0001059220028887819, "loss": 0.0642, "theoretical_loss": 3.3531967545186467, "tokens_seen": 2953838592 }, { "epoch": 0.9, "learning_rate": 0.00010584175894719948, "loss": 0.0649, "theoretical_loss": 3.3531739981512416, "tokens_seen": 2954100736 }, { "epoch": 0.9, "learning_rate": 0.00010576151500561708, "loss": 0.063, "theoretical_loss": 3.353151244368493, "tokens_seen": 2954362880 }, { "epoch": 0.9, "learning_rate": 0.00010568127106403466, "loss": 0.0627, "theoretical_loss": 3.353128493169878, "tokens_seen": 2954625024 }, { "epoch": 0.9, "learning_rate": 0.00010560102712245225, "loss": 0.0625, "theoretical_loss": 3.353105744554875, "tokens_seen": 2954887168 }, { "epoch": 0.9, "learning_rate": 0.00010552078318086984, "loss": 0.0641, "theoretical_loss": 3.3530829985229595, "tokens_seen": 2955149312 }, { "epoch": 0.9, "learning_rate": 0.00010544053923928744, "loss": 0.0638, "theoretical_loss": 3.3530602550736104, "tokens_seen": 2955411456 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.00048578670248389244, "objective/train/docs_used": 1074453, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3792080879211426, "objective/train/original_loss": 1.3792080879211426, "objective/train/theoretical_loss": 3.3530375142063056, "objective/train/tokens_used": 2976133600, "objective/train/value_avg": -0.0086669921875, "objective/train/value_loss": 0.00022539905330631882, "objective/train/value_max": -2.8848648071289062e-05, "objective/train/value_min": -0.93212890625, "objective/train/value_reward_corr": 0.7546802600898205, "objective/train/value_std": 0.016845703125, "objective/train/weight_avg": 1.000587821006775, "objective/train/weighted_lm_loss": 1.3799760341644287, "objective/train/weights_max": 1.2848964929580688, "objective/train/weights_min": 0.3680596351623535, "theoretical_loss": 3.3530375142063056, "tokens_seen": 2955673600 }, { "epoch": 0.9, "learning_rate": 0.00010536029529770502, "loss": 0.0664, "theoretical_loss": 3.3530375142063056, "tokens_seen": 2955673600 }, { "epoch": 0.9, "learning_rate": 0.00010528005135612262, "loss": 0.0648, "theoretical_loss": 3.353014775920522, "tokens_seen": 2955935744 }, { "epoch": 0.9, "learning_rate": 0.0001051998074145402, "loss": 0.065, "theoretical_loss": 3.3529920402157387, "tokens_seen": 2956197888 }, { "epoch": 0.9, "learning_rate": 0.0001051195634729578, "loss": 0.0638, "theoretical_loss": 3.352969307091433, "tokens_seen": 2956460032 }, { "epoch": 0.9, "learning_rate": 0.00010503931953137538, "loss": 0.0651, "theoretical_loss": 3.352946576547084, "tokens_seen": 2956722176 }, { "epoch": 0.9, "learning_rate": 0.00010495907558979298, "loss": 0.0648, "theoretical_loss": 3.3529238485821695, "tokens_seen": 2956984320 }, { "epoch": 0.9, "learning_rate": 0.00010487883164821056, "loss": 0.0675, "theoretical_loss": 3.352901123196169, "tokens_seen": 2957246464 }, { "epoch": 0.9, "learning_rate": 0.00010479858770662815, "loss": 0.0664, "theoretical_loss": 3.3528784003885606, "tokens_seen": 2957508608 }, { "epoch": 0.9, "learning_rate": 0.00010471834376504575, "loss": 0.0614, "theoretical_loss": 3.3528556801588234, "tokens_seen": 2957770752 }, { "epoch": 0.9, "learning_rate": 0.00010463809982346332, "loss": 0.0632, "theoretical_loss": 3.3528329625064366, "tokens_seen": 2958032896 }, { "epoch": 0.9, "learning_rate": 0.00010455785588188092, "loss": 0.0654, "theoretical_loss": 3.3528102474308796, "tokens_seen": 2958295040 }, { "epoch": 0.9, "learning_rate": 0.0001044776119402985, "loss": 0.0662, "theoretical_loss": 3.3527875349316316, "tokens_seen": 2958557184 }, { "epoch": 0.9, "learning_rate": 0.0001043973679987161, "loss": 0.0651, "theoretical_loss": 3.3527648250081725, "tokens_seen": 2958819328 }, { "epoch": 0.9, "learning_rate": 0.00010431712405713369, "loss": 0.0638, "theoretical_loss": 3.3527421176599814, "tokens_seen": 2959081472 }, { "epoch": 0.9, "learning_rate": 0.00010423688011555129, "loss": 0.0658, "theoretical_loss": 3.3527194128865387, "tokens_seen": 2959343616 }, { "epoch": 0.9, "learning_rate": 0.00010415663617396886, "loss": 0.0648, "theoretical_loss": 3.352696710687324, "tokens_seen": 2959605760 }, { "epoch": 0.9, "learning_rate": 0.00010407639223238646, "loss": 0.0655, "theoretical_loss": 3.352674011061818, "tokens_seen": 2959867904 }, { "epoch": 0.9, "learning_rate": 0.00010399614829080405, "loss": 0.0644, "theoretical_loss": 3.352651314009501, "tokens_seen": 2960130048 }, { "epoch": 0.9, "learning_rate": 0.00010391590434922163, "loss": 0.0656, "theoretical_loss": 3.3526286195298525, "tokens_seen": 2960392192 }, { "epoch": 0.9, "learning_rate": 0.00010383566040763923, "loss": 0.0629, "theoretical_loss": 3.352605927622354, "tokens_seen": 2960654336 }, { "epoch": 0.9, "learning_rate": 0.00010375541646605682, "loss": 0.0647, "theoretical_loss": 3.352583238286486, "tokens_seen": 2960916480 }, { "epoch": 0.9, "learning_rate": 0.0001036751725244744, "loss": 0.0635, "theoretical_loss": 3.35256055152173, "tokens_seen": 2961178624 }, { "epoch": 0.9, "learning_rate": 0.00010359492858289199, "loss": 0.062, "theoretical_loss": 3.352537867327566, "tokens_seen": 2961440768 }, { "epoch": 0.9, "learning_rate": 0.00010351468464130959, "loss": 0.0634, "theoretical_loss": 3.3525151857034765, "tokens_seen": 2961702912 }, { "epoch": 0.9, "learning_rate": 0.00010343444069972717, "loss": 0.0652, "theoretical_loss": 3.3524925066489417, "tokens_seen": 2961965056 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.000486421340610832, "objective/train/docs_used": 1076860, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.347232460975647, "objective/train/original_loss": 1.3472323417663574, "objective/train/theoretical_loss": 3.3524698301634435, "objective/train/tokens_used": 2982687200, "objective/train/value_avg": -0.00687408447265625, "objective/train/value_loss": 0.000281084852758795, "objective/train/value_max": -3.534555435180664e-05, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.6846278441731948, "objective/train/value_std": 0.01476287841796875, "objective/train/weight_avg": 1.0006098747253418, "objective/train/weighted_lm_loss": 1.3480408191680908, "objective/train/weights_max": 1.5544962882995605, "objective/train/weights_min": 0.3717136085033417, "theoretical_loss": 3.3524698301634435, "tokens_seen": 2962227200 }, { "epoch": 0.9, "learning_rate": 0.00010335419675814477, "loss": 0.0652, "theoretical_loss": 3.3524698301634435, "tokens_seen": 2962227200 }, { "epoch": 0.9, "learning_rate": 0.00010327395281656236, "loss": 0.0663, "theoretical_loss": 3.352447156246464, "tokens_seen": 2962489344 }, { "epoch": 0.9, "learning_rate": 0.00010319370887497994, "loss": 0.0643, "theoretical_loss": 3.3524244848974845, "tokens_seen": 2962751488 }, { "epoch": 0.9, "learning_rate": 0.00010311346493339753, "loss": 0.0656, "theoretical_loss": 3.352401816115987, "tokens_seen": 2963013632 }, { "epoch": 0.9, "learning_rate": 0.00010303322099181513, "loss": 0.0646, "theoretical_loss": 3.352379149901454, "tokens_seen": 2963275776 }, { "epoch": 0.9, "learning_rate": 0.00010295297705023271, "loss": 0.0642, "theoretical_loss": 3.3523564862533677, "tokens_seen": 2963537920 }, { "epoch": 0.9, "learning_rate": 0.0001028727331086503, "loss": 0.0645, "theoretical_loss": 3.3523338251712103, "tokens_seen": 2963800064 }, { "epoch": 0.9, "learning_rate": 0.0001027924891670679, "loss": 0.0637, "theoretical_loss": 3.3523111666544643, "tokens_seen": 2964062208 }, { "epoch": 0.9, "learning_rate": 0.00010271224522548547, "loss": 0.0634, "theoretical_loss": 3.352288510702613, "tokens_seen": 2964324352 }, { "epoch": 0.9, "learning_rate": 0.00010263200128390307, "loss": 0.0655, "theoretical_loss": 3.3522658573151385, "tokens_seen": 2964586496 }, { "epoch": 0.9, "learning_rate": 0.00010255175734232065, "loss": 0.0654, "theoretical_loss": 3.3522432064915244, "tokens_seen": 2964848640 }, { "epoch": 0.9, "learning_rate": 0.00010247151340073825, "loss": 0.0641, "theoretical_loss": 3.3522205582312536, "tokens_seen": 2965110784 }, { "epoch": 0.9, "learning_rate": 0.00010239126945915584, "loss": 0.0652, "theoretical_loss": 3.3521979125338097, "tokens_seen": 2965372928 }, { "epoch": 0.9, "learning_rate": 0.00010231102551757344, "loss": 0.0653, "theoretical_loss": 3.3521752693986757, "tokens_seen": 2965635072 }, { "epoch": 0.9, "learning_rate": 0.00010223078157599101, "loss": 0.0636, "theoretical_loss": 3.3521526288253356, "tokens_seen": 2965897216 }, { "epoch": 0.9, "learning_rate": 0.00010215053763440861, "loss": 0.064, "theoretical_loss": 3.352129990813273, "tokens_seen": 2966159360 }, { "epoch": 0.9, "learning_rate": 0.00010207029369282619, "loss": 0.0643, "theoretical_loss": 3.3521073553619725, "tokens_seen": 2966421504 }, { "epoch": 0.9, "learning_rate": 0.00010199004975124378, "loss": 0.0619, "theoretical_loss": 3.352084722470917, "tokens_seen": 2966683648 }, { "epoch": 0.9, "learning_rate": 0.00010190980580966138, "loss": 0.0631, "theoretical_loss": 3.352062092139591, "tokens_seen": 2966945792 }, { "epoch": 0.9, "learning_rate": 0.00010182956186807896, "loss": 0.0615, "theoretical_loss": 3.35203946436748, "tokens_seen": 2967207936 }, { "epoch": 0.9, "learning_rate": 0.00010174931792649655, "loss": 0.0657, "theoretical_loss": 3.3520168391540675, "tokens_seen": 2967470080 }, { "epoch": 0.9, "learning_rate": 0.00010166907398491413, "loss": 0.0639, "theoretical_loss": 3.3519942164988383, "tokens_seen": 2967732224 }, { "epoch": 0.9, "learning_rate": 0.00010158883004333173, "loss": 0.067, "theoretical_loss": 3.3519715964012775, "tokens_seen": 2967994368 }, { "epoch": 0.9, "learning_rate": 0.00010150858610174932, "loss": 0.0646, "theoretical_loss": 3.3519489788608694, "tokens_seen": 2968256512 }, { "epoch": 0.9, "learning_rate": 0.00010142834216016692, "loss": 0.0648, "theoretical_loss": 3.3519263638770997, "tokens_seen": 2968518656 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0002028256276389584, "objective/train/docs_used": 1079221, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2540658712387085, "objective/train/original_loss": 1.254065752029419, "objective/train/theoretical_loss": 3.351903751449454, "objective/train/tokens_used": 2989240800, "objective/train/value_avg": -0.007740020751953125, "objective/train/value_loss": 0.00018627307144924998, "objective/train/value_max": -2.0444393157958984e-05, "objective/train/value_min": -0.6416015625, "objective/train/value_reward_corr": 0.7760167147041913, "objective/train/value_std": 0.01641845703125, "objective/train/weight_avg": 1.000290036201477, "objective/train/weighted_lm_loss": 1.2542730569839478, "objective/train/weights_max": 1.706524133682251, "objective/train/weights_min": 0.36923545598983765, "theoretical_loss": 3.351903751449454, "tokens_seen": 2968780800 }, { "epoch": 0.9, "learning_rate": 0.0001013480982185845, "loss": 0.0631, "theoretical_loss": 3.351903751449454, "tokens_seen": 2968780800 }, { "epoch": 0.9, "learning_rate": 0.00010126785427700209, "loss": 0.0641, "theoretical_loss": 3.351881141577417, "tokens_seen": 2969042944 }, { "epoch": 0.9, "learning_rate": 0.00010118761033541967, "loss": 0.0631, "theoretical_loss": 3.3518585342604745, "tokens_seen": 2969305088 }, { "epoch": 0.9, "learning_rate": 0.00010110736639383726, "loss": 0.0626, "theoretical_loss": 3.3518359294981126, "tokens_seen": 2969567232 }, { "epoch": 0.9, "learning_rate": 0.00010102712245225486, "loss": 0.0626, "theoretical_loss": 3.3518133272898165, "tokens_seen": 2969829376 }, { "epoch": 0.9, "learning_rate": 0.00010094687851067244, "loss": 0.0653, "theoretical_loss": 3.3517907276350725, "tokens_seen": 2970091520 }, { "epoch": 0.9, "learning_rate": 0.00010086663456909004, "loss": 0.0642, "theoretical_loss": 3.351768130533367, "tokens_seen": 2970353664 }, { "epoch": 0.9, "learning_rate": 0.00010078639062750762, "loss": 0.0646, "theoretical_loss": 3.351745535984186, "tokens_seen": 2970615808 }, { "epoch": 0.9, "learning_rate": 0.00010070614668592521, "loss": 0.0641, "theoretical_loss": 3.3517229439870166, "tokens_seen": 2970877952 }, { "epoch": 0.9, "learning_rate": 0.0001006259027443428, "loss": 0.0631, "theoretical_loss": 3.3517003545413444, "tokens_seen": 2971140096 }, { "epoch": 0.9, "learning_rate": 0.0001005456588027604, "loss": 0.0654, "theoretical_loss": 3.351677767646657, "tokens_seen": 2971402240 }, { "epoch": 0.9, "learning_rate": 0.00010046541486117798, "loss": 0.0633, "theoretical_loss": 3.351655183302441, "tokens_seen": 2971664384 }, { "epoch": 0.9, "learning_rate": 0.00010038517091959558, "loss": 0.0639, "theoretical_loss": 3.3516326015081828, "tokens_seen": 2971926528 }, { "epoch": 0.9, "learning_rate": 0.00010030492697801316, "loss": 0.062, "theoretical_loss": 3.3516100222633707, "tokens_seen": 2972188672 }, { "epoch": 0.9, "learning_rate": 0.00010022468303643075, "loss": 0.0619, "theoretical_loss": 3.351587445567491, "tokens_seen": 2972450816 }, { "epoch": 0.9, "learning_rate": 0.00010014443909484834, "loss": 0.0651, "theoretical_loss": 3.351564871420033, "tokens_seen": 2972712960 }, { "epoch": 0.9, "learning_rate": 0.00010006419515326593, "loss": 0.0623, "theoretical_loss": 3.351542299820482, "tokens_seen": 2972975104 }, { "epoch": 0.9, "learning_rate": 9.998395121168352e-05, "loss": 0.0643, "theoretical_loss": 3.3515197307683273, "tokens_seen": 2973237248 }, { "epoch": 0.9, "learning_rate": 9.990370727010111e-05, "loss": 0.0651, "theoretical_loss": 3.3514971642630567, "tokens_seen": 2973499392 }, { "epoch": 0.9, "learning_rate": 9.98234633285187e-05, "loss": 0.065, "theoretical_loss": 3.3514746003041576, "tokens_seen": 2973761536 }, { "epoch": 0.9, "learning_rate": 9.974321938693628e-05, "loss": 0.0657, "theoretical_loss": 3.351452038891119, "tokens_seen": 2974023680 }, { "epoch": 0.9, "learning_rate": 9.966297544535388e-05, "loss": 0.0651, "theoretical_loss": 3.3514294800234286, "tokens_seen": 2974285824 }, { "epoch": 0.9, "learning_rate": 9.958273150377147e-05, "loss": 0.0659, "theoretical_loss": 3.3514069237005755, "tokens_seen": 2974547968 }, { "epoch": 0.9, "learning_rate": 9.950248756218906e-05, "loss": 0.0632, "theoretical_loss": 3.3513843699220485, "tokens_seen": 2974810112 }, { "epoch": 0.9, "learning_rate": 9.942224362060665e-05, "loss": 0.0642, "theoretical_loss": 3.351361818687336, "tokens_seen": 2975072256 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0007520467042922974, "objective/train/docs_used": 1081640, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2578017711639404, "objective/train/original_loss": 1.2578016519546509, "objective/train/theoretical_loss": 3.3513392699959272, "objective/train/tokens_used": 2995794400, "objective/train/value_avg": -0.007770538330078125, "objective/train/value_loss": 0.00020639115246012807, "objective/train/value_max": -3.594160079956055e-05, "objective/train/value_min": -0.487060546875, "objective/train/value_reward_corr": 0.7141149475744621, "objective/train/value_std": 0.01519012451171875, "objective/train/weight_avg": 1.0008457899093628, "objective/train/weighted_lm_loss": 1.2584537267684937, "objective/train/weights_max": 1.2506414651870728, "objective/train/weights_min": 0.37683340907096863, "theoretical_loss": 3.3513392699959272, "tokens_seen": 2975334400 }, { "epoch": 0.9, "learning_rate": 9.934199967902424e-05, "loss": 0.0632, "theoretical_loss": 3.3513392699959272, "tokens_seen": 2975334400 }, { "epoch": 0.9, "learning_rate": 9.926175573744182e-05, "loss": 0.0646, "theoretical_loss": 3.3513167238473107, "tokens_seen": 2975596544 }, { "epoch": 0.9, "learning_rate": 9.918151179585941e-05, "loss": 0.0622, "theoretical_loss": 3.3512941802409766, "tokens_seen": 2975858688 }, { "epoch": 0.9, "learning_rate": 9.9101267854277e-05, "loss": 0.0657, "theoretical_loss": 3.3512716391764137, "tokens_seen": 2976120832 }, { "epoch": 0.9, "learning_rate": 9.902102391269459e-05, "loss": 0.0634, "theoretical_loss": 3.351249100653112, "tokens_seen": 2976382976 }, { "epoch": 0.9, "learning_rate": 9.894077997111219e-05, "loss": 0.0678, "theoretical_loss": 3.3512265646705615, "tokens_seen": 2976645120 }, { "epoch": 0.9, "learning_rate": 9.886053602952976e-05, "loss": 0.0642, "theoretical_loss": 3.3512040312282507, "tokens_seen": 2976907264 }, { "epoch": 0.9, "learning_rate": 9.878029208794736e-05, "loss": 0.0645, "theoretical_loss": 3.351181500325671, "tokens_seen": 2977169408 }, { "epoch": 0.9, "learning_rate": 9.870004814636495e-05, "loss": 0.0626, "theoretical_loss": 3.351158971962312, "tokens_seen": 2977431552 }, { "epoch": 0.9, "learning_rate": 9.861980420478255e-05, "loss": 0.0685, "theoretical_loss": 3.351136446137664, "tokens_seen": 2977693696 }, { "epoch": 0.9, "learning_rate": 9.853956026320013e-05, "loss": 0.0673, "theoretical_loss": 3.3511139228512175, "tokens_seen": 2977955840 }, { "epoch": 0.9, "learning_rate": 9.845931632161773e-05, "loss": 0.0635, "theoretical_loss": 3.3510914021024636, "tokens_seen": 2978217984 }, { "epoch": 0.9, "learning_rate": 9.83790723800353e-05, "loss": 0.0643, "theoretical_loss": 3.3510688838908917, "tokens_seen": 2978480128 }, { "epoch": 0.9, "learning_rate": 9.82988284384529e-05, "loss": 0.0671, "theoretical_loss": 3.3510463682159943, "tokens_seen": 2978742272 }, { "epoch": 0.9, "learning_rate": 9.821858449687049e-05, "loss": 0.0646, "theoretical_loss": 3.3510238550772615, "tokens_seen": 2979004416 }, { "epoch": 0.9, "learning_rate": 9.813834055528807e-05, "loss": 0.0623, "theoretical_loss": 3.3510013444741844, "tokens_seen": 2979266560 }, { "epoch": 0.9, "learning_rate": 9.805809661370567e-05, "loss": 0.0645, "theoretical_loss": 3.350978836406255, "tokens_seen": 2979528704 }, { "epoch": 0.9, "learning_rate": 9.797785267212326e-05, "loss": 0.0652, "theoretical_loss": 3.3509563308729646, "tokens_seen": 2979790848 }, { "epoch": 0.9, "learning_rate": 9.789760873054084e-05, "loss": 0.0629, "theoretical_loss": 3.350933827873804, "tokens_seen": 2980052992 }, { "epoch": 0.9, "learning_rate": 9.781736478895843e-05, "loss": 0.0669, "theoretical_loss": 3.3509113274082662, "tokens_seen": 2980315136 }, { "epoch": 0.9, "learning_rate": 9.773712084737603e-05, "loss": 0.0658, "theoretical_loss": 3.350888829475842, "tokens_seen": 2980577280 }, { "epoch": 0.9, "learning_rate": 9.765687690579361e-05, "loss": 0.0624, "theoretical_loss": 3.3508663340760245, "tokens_seen": 2980839424 }, { "epoch": 0.9, "learning_rate": 9.757663296421121e-05, "loss": 0.0677, "theoretical_loss": 3.3508438412083055, "tokens_seen": 2981101568 }, { "epoch": 0.9, "learning_rate": 9.74963890226288e-05, "loss": 0.0646, "theoretical_loss": 3.350821350872177, "tokens_seen": 2981363712 }, { "epoch": 0.9, "learning_rate": 9.741614508104638e-05, "loss": 0.0632, "theoretical_loss": 3.3507988630671317, "tokens_seen": 2981625856 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0002680000616237521, "objective/train/docs_used": 1084154, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2389239072799683, "objective/train/original_loss": 1.2389240264892578, "objective/train/theoretical_loss": 3.3507763777926627, "objective/train/tokens_used": 3002348000, "objective/train/value_avg": -0.0086822509765625, "objective/train/value_loss": 0.00023992547357920557, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.9775390625, "objective/train/value_reward_corr": 0.7576909276402002, "objective/train/value_std": 0.019012451171875, "objective/train/weight_avg": 1.0003770589828491, "objective/train/weighted_lm_loss": 1.2388797998428345, "objective/train/weights_max": 1.7507752180099487, "objective/train/weights_min": 0.36900806427001953, "theoretical_loss": 3.3507763777926627, "tokens_seen": 2981888000 }, { "epoch": 0.9, "learning_rate": 9.733590113946397e-05, "loss": 0.0636, "theoretical_loss": 3.3507763777926627, "tokens_seen": 2981888000 }, { "epoch": 0.9, "learning_rate": 9.725565719788155e-05, "loss": 0.0629, "theoretical_loss": 3.3507538950482623, "tokens_seen": 2982150144 }, { "epoch": 0.9, "learning_rate": 9.717541325629915e-05, "loss": 0.0646, "theoretical_loss": 3.3507314148334233, "tokens_seen": 2982412288 }, { "epoch": 0.9, "learning_rate": 9.709516931471674e-05, "loss": 0.0653, "theoretical_loss": 3.3507089371476395, "tokens_seen": 2982674432 }, { "epoch": 0.9, "learning_rate": 9.701492537313434e-05, "loss": 0.0653, "theoretical_loss": 3.3506864619904038, "tokens_seen": 2982936576 }, { "epoch": 0.9, "learning_rate": 9.693468143155191e-05, "loss": 0.0624, "theoretical_loss": 3.3506639893612093, "tokens_seen": 2983198720 }, { "epoch": 0.9, "learning_rate": 9.685443748996951e-05, "loss": 0.0636, "theoretical_loss": 3.3506415192595496, "tokens_seen": 2983460864 }, { "epoch": 0.9, "learning_rate": 9.67741935483871e-05, "loss": 0.0626, "theoretical_loss": 3.350619051684919, "tokens_seen": 2983723008 }, { "epoch": 0.9, "learning_rate": 9.669394960680469e-05, "loss": 0.0632, "theoretical_loss": 3.3505965866368106, "tokens_seen": 2983985152 }, { "epoch": 0.9, "learning_rate": 9.661370566522228e-05, "loss": 0.0614, "theoretical_loss": 3.3505741241147184, "tokens_seen": 2984247296 }, { "epoch": 0.9, "learning_rate": 9.653346172363988e-05, "loss": 0.0625, "theoretical_loss": 3.350551664118137, "tokens_seen": 2984509440 }, { "epoch": 0.9, "learning_rate": 9.645321778205746e-05, "loss": 0.0638, "theoretical_loss": 3.35052920664656, "tokens_seen": 2984771584 }, { "epoch": 0.9, "learning_rate": 9.637297384047504e-05, "loss": 0.0642, "theoretical_loss": 3.350506751699483, "tokens_seen": 2985033728 }, { "epoch": 0.9, "learning_rate": 9.629272989889263e-05, "loss": 0.0621, "theoretical_loss": 3.3504842992763995, "tokens_seen": 2985295872 }, { "epoch": 0.9, "learning_rate": 9.621248595731022e-05, "loss": 0.0646, "theoretical_loss": 3.350461849376804, "tokens_seen": 2985558016 }, { "epoch": 0.9, "learning_rate": 9.613224201572782e-05, "loss": 0.0658, "theoretical_loss": 3.350439402000192, "tokens_seen": 2985820160 }, { "epoch": 0.9, "learning_rate": 9.60519980741454e-05, "loss": 0.0631, "theoretical_loss": 3.3504169571460585, "tokens_seen": 2986082304 }, { "epoch": 0.9, "learning_rate": 9.5971754132563e-05, "loss": 0.0648, "theoretical_loss": 3.3503945148138983, "tokens_seen": 2986344448 }, { "epoch": 0.91, "learning_rate": 9.589151019098058e-05, "loss": 0.0643, "theoretical_loss": 3.350372075003207, "tokens_seen": 2986606592 }, { "epoch": 0.91, "learning_rate": 9.581126624939817e-05, "loss": 0.0629, "theoretical_loss": 3.3503496377134794, "tokens_seen": 2986868736 }, { "epoch": 0.91, "learning_rate": 9.573102230781576e-05, "loss": 0.0629, "theoretical_loss": 3.3503272029442117, "tokens_seen": 2987130880 }, { "epoch": 0.91, "learning_rate": 9.565077836623336e-05, "loss": 0.0632, "theoretical_loss": 3.3503047706948994, "tokens_seen": 2987393024 }, { "epoch": 0.91, "learning_rate": 9.557053442465094e-05, "loss": 0.0617, "theoretical_loss": 3.3502823409650384, "tokens_seen": 2987655168 }, { "epoch": 0.91, "learning_rate": 9.549029048306854e-05, "loss": 0.0662, "theoretical_loss": 3.3502599137541247, "tokens_seen": 2987917312 }, { "epoch": 0.91, "learning_rate": 9.541004654148612e-05, "loss": 0.0656, "theoretical_loss": 3.350237489061654, "tokens_seen": 2988179456 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.00044840245391242206, "objective/train/docs_used": 1086412, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2609559297561646, "objective/train/original_loss": 1.260955810546875, "objective/train/theoretical_loss": 3.350215066887124, "objective/train/tokens_used": 3008901600, "objective/train/value_avg": -0.00728607177734375, "objective/train/value_loss": 0.00024669128470122814, "objective/train/value_max": -1.9669532775878906e-05, "objective/train/value_min": -0.939453125, "objective/train/value_reward_corr": 0.7119615799601327, "objective/train/value_std": 0.01641845703125, "objective/train/weight_avg": 1.0005605220794678, "objective/train/weighted_lm_loss": 1.261648178100586, "objective/train/weights_max": 2.1778111457824707, "objective/train/weights_min": 0.37020087242126465, "theoretical_loss": 3.350215066887124, "tokens_seen": 2988441600 }, { "epoch": 0.91, "learning_rate": 9.53298025999037e-05, "loss": 0.0647, "theoretical_loss": 3.350215066887124, "tokens_seen": 2988441600 }, { "epoch": 0.91, "learning_rate": 9.52495586583213e-05, "loss": 0.0631, "theoretical_loss": 3.3501926472300294, "tokens_seen": 2988703744 }, { "epoch": 0.91, "learning_rate": 9.516931471673889e-05, "loss": 0.0633, "theoretical_loss": 3.3501702300898675, "tokens_seen": 2988965888 }, { "epoch": 0.91, "learning_rate": 9.508907077515648e-05, "loss": 0.0638, "theoretical_loss": 3.3501478154661353, "tokens_seen": 2989228032 }, { "epoch": 0.91, "learning_rate": 9.500882683357407e-05, "loss": 0.064, "theoretical_loss": 3.3501254033583296, "tokens_seen": 2989490176 }, { "epoch": 0.91, "learning_rate": 9.492858289199166e-05, "loss": 0.0659, "theoretical_loss": 3.350102993765947, "tokens_seen": 2989752320 }, { "epoch": 0.91, "learning_rate": 9.484833895040924e-05, "loss": 0.0665, "theoretical_loss": 3.3500805866884855, "tokens_seen": 2990014464 }, { "epoch": 0.91, "learning_rate": 9.476809500882684e-05, "loss": 0.0599, "theoretical_loss": 3.350058182125441, "tokens_seen": 2990276608 }, { "epoch": 0.91, "learning_rate": 9.468785106724443e-05, "loss": 0.0633, "theoretical_loss": 3.3500357800763125, "tokens_seen": 2990538752 }, { "epoch": 0.91, "learning_rate": 9.460760712566202e-05, "loss": 0.0635, "theoretical_loss": 3.3500133805405965, "tokens_seen": 2990800896 }, { "epoch": 0.91, "learning_rate": 9.452736318407961e-05, "loss": 0.0636, "theoretical_loss": 3.3499909835177912, "tokens_seen": 2991063040 }, { "epoch": 0.91, "learning_rate": 9.444711924249718e-05, "loss": 0.0665, "theoretical_loss": 3.3499685890073945, "tokens_seen": 2991325184 }, { "epoch": 0.91, "learning_rate": 9.436687530091478e-05, "loss": 0.063, "theoretical_loss": 3.3499461970089044, "tokens_seen": 2991587328 }, { "epoch": 0.91, "learning_rate": 9.428663135933237e-05, "loss": 0.0637, "theoretical_loss": 3.349923807521819, "tokens_seen": 2991849472 }, { "epoch": 0.91, "learning_rate": 9.420638741774997e-05, "loss": 0.0663, "theoretical_loss": 3.3499014205456366, "tokens_seen": 2992111616 }, { "epoch": 0.91, "learning_rate": 9.412614347616755e-05, "loss": 0.0633, "theoretical_loss": 3.349879036079856, "tokens_seen": 2992373760 }, { "epoch": 0.91, "learning_rate": 9.404589953458515e-05, "loss": 0.0622, "theoretical_loss": 3.349856654123975, "tokens_seen": 2992635904 }, { "epoch": 0.91, "learning_rate": 9.396565559300272e-05, "loss": 0.0622, "theoretical_loss": 3.349834274677493, "tokens_seen": 2992898048 }, { "epoch": 0.91, "learning_rate": 9.388541165142032e-05, "loss": 0.0619, "theoretical_loss": 3.349811897739909, "tokens_seen": 2993160192 }, { "epoch": 0.91, "learning_rate": 9.380516770983791e-05, "loss": 0.0615, "theoretical_loss": 3.3497895233107218, "tokens_seen": 2993422336 }, { "epoch": 0.91, "learning_rate": 9.37249237682555e-05, "loss": 0.0623, "theoretical_loss": 3.3497671513894307, "tokens_seen": 2993684480 }, { "epoch": 0.91, "learning_rate": 9.364467982667309e-05, "loss": 0.0653, "theoretical_loss": 3.3497447819755344, "tokens_seen": 2993946624 }, { "epoch": 0.91, "learning_rate": 9.356443588509068e-05, "loss": 0.063, "theoretical_loss": 3.349722415068533, "tokens_seen": 2994208768 }, { "epoch": 0.91, "learning_rate": 9.348419194350826e-05, "loss": 0.0642, "theoretical_loss": 3.3497000506679266, "tokens_seen": 2994470912 }, { "epoch": 0.91, "learning_rate": 9.340394800192585e-05, "loss": 0.0635, "theoretical_loss": 3.349677688773214, "tokens_seen": 2994733056 }, { "epoch": 0.91, "objective/train/advantage_avg": 4.877928859059466e-06, "objective/train/docs_used": 1088743, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2250750064849854, "objective/train/original_loss": 1.2250750064849854, "objective/train/theoretical_loss": 3.3496553293838955, "objective/train/tokens_used": 3015455200, "objective/train/value_avg": -0.007190704345703125, "objective/train/value_loss": 0.0002711678680498153, "objective/train/value_max": -2.092123031616211e-05, "objective/train/value_min": -0.77490234375, "objective/train/value_reward_corr": 0.736253916918936, "objective/train/value_std": 0.01544189453125, "objective/train/weight_avg": 1.000122308731079, "objective/train/weighted_lm_loss": 1.2247551679611206, "objective/train/weights_max": 1.774009346961975, "objective/train/weights_min": 0.3681500554084778, "theoretical_loss": 3.3496553293838955, "tokens_seen": 2994995200 }, { "epoch": 0.91, "learning_rate": 9.332370406034345e-05, "loss": 0.0622, "theoretical_loss": 3.3496553293838955, "tokens_seen": 2994995200 }, { "epoch": 0.91, "learning_rate": 9.324346011876103e-05, "loss": 0.0645, "theoretical_loss": 3.349632972499471, "tokens_seen": 2995257344 }, { "epoch": 0.91, "learning_rate": 9.316321617717863e-05, "loss": 0.0602, "theoretical_loss": 3.3496106181194407, "tokens_seen": 2995519488 }, { "epoch": 0.91, "learning_rate": 9.308297223559622e-05, "loss": 0.0626, "theoretical_loss": 3.3495882662433054, "tokens_seen": 2995781632 }, { "epoch": 0.91, "learning_rate": 9.30027282940138e-05, "loss": 0.0635, "theoretical_loss": 3.349565916870565, "tokens_seen": 2996043776 }, { "epoch": 0.91, "learning_rate": 9.292248435243139e-05, "loss": 0.0654, "theoretical_loss": 3.34954357000072, "tokens_seen": 2996305920 }, { "epoch": 0.91, "learning_rate": 9.284224041084899e-05, "loss": 0.0636, "theoretical_loss": 3.3495212256332723, "tokens_seen": 2996568064 }, { "epoch": 0.91, "learning_rate": 9.276199646926657e-05, "loss": 0.0631, "theoretical_loss": 3.3494988837677213, "tokens_seen": 2996830208 }, { "epoch": 0.91, "learning_rate": 9.268175252768417e-05, "loss": 0.0637, "theoretical_loss": 3.349476544403569, "tokens_seen": 2997092352 }, { "epoch": 0.91, "learning_rate": 9.260150858610176e-05, "loss": 0.0629, "theoretical_loss": 3.349454207540316, "tokens_seen": 2997354496 }, { "epoch": 0.91, "learning_rate": 9.252126464451933e-05, "loss": 0.0642, "theoretical_loss": 3.3494318731774646, "tokens_seen": 2997616640 }, { "epoch": 0.91, "learning_rate": 9.244102070293693e-05, "loss": 0.0625, "theoretical_loss": 3.3494095413145155, "tokens_seen": 2997878784 }, { "epoch": 0.91, "learning_rate": 9.236077676135451e-05, "loss": 0.0628, "theoretical_loss": 3.3493872119509702, "tokens_seen": 2998140928 }, { "epoch": 0.91, "learning_rate": 9.228053281977211e-05, "loss": 0.0635, "theoretical_loss": 3.349364885086331, "tokens_seen": 2998403072 }, { "epoch": 0.91, "learning_rate": 9.22002888781897e-05, "loss": 0.0632, "theoretical_loss": 3.3493425607200993, "tokens_seen": 2998665216 }, { "epoch": 0.91, "learning_rate": 9.21200449366073e-05, "loss": 0.0655, "theoretical_loss": 3.349320238851777, "tokens_seen": 2998927360 }, { "epoch": 0.91, "learning_rate": 9.203980099502487e-05, "loss": 0.063, "theoretical_loss": 3.3492979194808674, "tokens_seen": 2999189504 }, { "epoch": 0.91, "learning_rate": 9.195955705344247e-05, "loss": 0.0629, "theoretical_loss": 3.349275602606872, "tokens_seen": 2999451648 }, { "epoch": 0.91, "learning_rate": 9.187931311186005e-05, "loss": 0.064, "theoretical_loss": 3.3492532882292934, "tokens_seen": 2999713792 }, { "epoch": 0.91, "learning_rate": 9.179906917027765e-05, "loss": 0.0642, "theoretical_loss": 3.3492309763476342, "tokens_seen": 2999975936 }, { "epoch": 0.91, "learning_rate": 9.171882522869524e-05, "loss": 0.0639, "theoretical_loss": 3.349208666961397, "tokens_seen": 3000238080 }, { "epoch": 0.91, "learning_rate": 9.163858128711282e-05, "loss": 0.0623, "theoretical_loss": 3.349186360070085, "tokens_seen": 3000500224 }, { "epoch": 0.91, "learning_rate": 9.155833734553041e-05, "loss": 0.0628, "theoretical_loss": 3.349164055673201, "tokens_seen": 3000762368 }, { "epoch": 0.91, "learning_rate": 9.1478093403948e-05, "loss": 0.0653, "theoretical_loss": 3.3491417537702484, "tokens_seen": 3001024512 }, { "epoch": 0.91, "learning_rate": 9.13978494623656e-05, "loss": 0.0626, "theoretical_loss": 3.3491194543607308, "tokens_seen": 3001286656 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.0006810373160988092, "objective/train/docs_used": 1091037, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2039985656738281, "objective/train/original_loss": 1.2039985656738281, "objective/train/theoretical_loss": 3.349097157444151, "objective/train/tokens_used": 3022008800, "objective/train/value_avg": -0.007076263427734375, "objective/train/value_loss": 0.00011562102008610964, "objective/train/value_max": -1.9252300262451172e-05, "objective/train/value_min": -0.258056640625, "objective/train/value_reward_corr": 0.7941427158484182, "objective/train/value_std": 0.0141754150390625, "objective/train/weight_avg": 1.000735878944397, "objective/train/weighted_lm_loss": 1.2046757936477661, "objective/train/weights_max": 1.2743442058563232, "objective/train/weights_min": 0.3687484562397003, "theoretical_loss": 3.349097157444151, "tokens_seen": 3001548800 }, { "epoch": 0.91, "learning_rate": 9.131760552078318e-05, "loss": 0.0638, "theoretical_loss": 3.349097157444151, "tokens_seen": 3001548800 }, { "epoch": 0.91, "learning_rate": 9.123736157920078e-05, "loss": 0.0656, "theoretical_loss": 3.349074863020013, "tokens_seen": 3001810944 }, { "epoch": 0.91, "learning_rate": 9.115711763761836e-05, "loss": 0.063, "theoretical_loss": 3.3490525710878205, "tokens_seen": 3002073088 }, { "epoch": 0.91, "learning_rate": 9.107687369603595e-05, "loss": 0.0639, "theoretical_loss": 3.3490302816470776, "tokens_seen": 3002335232 }, { "epoch": 0.91, "learning_rate": 9.099662975445354e-05, "loss": 0.0642, "theoretical_loss": 3.349007994697288, "tokens_seen": 3002597376 }, { "epoch": 0.91, "learning_rate": 9.091638581287113e-05, "loss": 0.0632, "theoretical_loss": 3.348985710237956, "tokens_seen": 3002859520 }, { "epoch": 0.91, "learning_rate": 9.083614187128872e-05, "loss": 0.0649, "theoretical_loss": 3.348963428268586, "tokens_seen": 3003121664 }, { "epoch": 0.91, "learning_rate": 9.075589792970632e-05, "loss": 0.0631, "theoretical_loss": 3.3489411487886827, "tokens_seen": 3003383808 }, { "epoch": 0.91, "learning_rate": 9.06756539881239e-05, "loss": 0.0628, "theoretical_loss": 3.34891887179775, "tokens_seen": 3003645952 }, { "epoch": 0.91, "learning_rate": 9.059541004654148e-05, "loss": 0.0633, "theoretical_loss": 3.348896597295293, "tokens_seen": 3003908096 }, { "epoch": 0.91, "learning_rate": 9.051516610495908e-05, "loss": 0.062, "theoretical_loss": 3.3488743252808173, "tokens_seen": 3004170240 }, { "epoch": 0.91, "learning_rate": 9.043492216337666e-05, "loss": 0.0648, "theoretical_loss": 3.3488520557538273, "tokens_seen": 3004432384 }, { "epoch": 0.91, "learning_rate": 9.035467822179426e-05, "loss": 0.0638, "theoretical_loss": 3.348829788713828, "tokens_seen": 3004694528 }, { "epoch": 0.91, "learning_rate": 9.027443428021185e-05, "loss": 0.0632, "theoretical_loss": 3.348807524160325, "tokens_seen": 3004956672 }, { "epoch": 0.91, "learning_rate": 9.019419033862944e-05, "loss": 0.0626, "theoretical_loss": 3.3487852620928233, "tokens_seen": 3005218816 }, { "epoch": 0.91, "learning_rate": 9.011394639704702e-05, "loss": 0.0616, "theoretical_loss": 3.3487630025108293, "tokens_seen": 3005480960 }, { "epoch": 0.91, "learning_rate": 9.003370245546462e-05, "loss": 0.0621, "theoretical_loss": 3.3487407454138483, "tokens_seen": 3005743104 }, { "epoch": 0.91, "learning_rate": 8.99534585138822e-05, "loss": 0.0633, "theoretical_loss": 3.348718490801386, "tokens_seen": 3006005248 }, { "epoch": 0.91, "learning_rate": 8.98732145722998e-05, "loss": 0.0655, "theoretical_loss": 3.348696238672949, "tokens_seen": 3006267392 }, { "epoch": 0.91, "learning_rate": 8.979297063071739e-05, "loss": 0.0629, "theoretical_loss": 3.3486739890280433, "tokens_seen": 3006529536 }, { "epoch": 0.91, "learning_rate": 8.971272668913497e-05, "loss": 0.0662, "theoretical_loss": 3.3486517418661745, "tokens_seen": 3006791680 }, { "epoch": 0.91, "learning_rate": 8.963248274755256e-05, "loss": 0.0653, "theoretical_loss": 3.3486294971868498, "tokens_seen": 3007053824 }, { "epoch": 0.91, "learning_rate": 8.955223880597014e-05, "loss": 0.0633, "theoretical_loss": 3.3486072549895756, "tokens_seen": 3007315968 }, { "epoch": 0.91, "learning_rate": 8.947199486438774e-05, "loss": 0.0657, "theoretical_loss": 3.3485850152738585, "tokens_seen": 3007578112 }, { "epoch": 0.91, "learning_rate": 8.939175092280533e-05, "loss": 0.0624, "theoretical_loss": 3.3485627780392058, "tokens_seen": 3007840256 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.00021146050130482763, "objective/train/docs_used": 1093533, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1856549978256226, "objective/train/original_loss": 1.185654878616333, "objective/train/theoretical_loss": 3.348540543285124, "objective/train/tokens_used": 3028562400, "objective/train/value_avg": -0.00530242919921875, "objective/train/value_loss": 0.0002320571948075667, "objective/train/value_max": -1.1205673217773438e-05, "objective/train/value_min": -0.82958984375, "objective/train/value_reward_corr": 0.5928466282023085, "objective/train/value_std": 0.01294708251953125, "objective/train/weight_avg": 1.0003186464309692, "objective/train/weighted_lm_loss": 1.185418725013733, "objective/train/weights_max": 2.2635436058044434, "objective/train/weights_min": 0.36832353472709656, "theoretical_loss": 3.348540543285124, "tokens_seen": 3008102400 }, { "epoch": 0.91, "learning_rate": 8.931150698122293e-05, "loss": 0.0614, "theoretical_loss": 3.348540543285124, "tokens_seen": 3008102400 }, { "epoch": 0.91, "learning_rate": 8.923126303964051e-05, "loss": 0.0632, "theoretical_loss": 3.3485183110111203, "tokens_seen": 3008364544 }, { "epoch": 0.91, "learning_rate": 8.91510190980581e-05, "loss": 0.0618, "theoretical_loss": 3.348496081216702, "tokens_seen": 3008626688 }, { "epoch": 0.91, "learning_rate": 8.907077515647568e-05, "loss": 0.0663, "theoretical_loss": 3.3484738539013774, "tokens_seen": 3008888832 }, { "epoch": 0.91, "learning_rate": 8.899053121489328e-05, "loss": 0.0638, "theoretical_loss": 3.3484516290646527, "tokens_seen": 3009150976 }, { "epoch": 0.91, "learning_rate": 8.891028727331087e-05, "loss": 0.0662, "theoretical_loss": 3.3484294067060367, "tokens_seen": 3009413120 }, { "epoch": 0.91, "learning_rate": 8.883004333172845e-05, "loss": 0.0627, "theoretical_loss": 3.3484071868250367, "tokens_seen": 3009675264 }, { "epoch": 0.91, "learning_rate": 8.874979939014605e-05, "loss": 0.0647, "theoretical_loss": 3.348384969421161, "tokens_seen": 3009937408 }, { "epoch": 0.91, "learning_rate": 8.866955544856362e-05, "loss": 0.0622, "theoretical_loss": 3.3483627544939174, "tokens_seen": 3010199552 }, { "epoch": 0.91, "learning_rate": 8.858931150698122e-05, "loss": 0.0654, "theoretical_loss": 3.3483405420428145, "tokens_seen": 3010461696 }, { "epoch": 0.91, "learning_rate": 8.850906756539881e-05, "loss": 0.0628, "theoretical_loss": 3.34831833206736, "tokens_seen": 3010723840 }, { "epoch": 0.91, "learning_rate": 8.842882362381641e-05, "loss": 0.063, "theoretical_loss": 3.3482961245670637, "tokens_seen": 3010985984 }, { "epoch": 0.91, "learning_rate": 8.834857968223399e-05, "loss": 0.0628, "theoretical_loss": 3.348273919541434, "tokens_seen": 3011248128 }, { "epoch": 0.91, "learning_rate": 8.826833574065159e-05, "loss": 0.066, "theoretical_loss": 3.348251716989979, "tokens_seen": 3011510272 }, { "epoch": 0.91, "learning_rate": 8.818809179906916e-05, "loss": 0.064, "theoretical_loss": 3.3482295169122076, "tokens_seen": 3011772416 }, { "epoch": 0.91, "learning_rate": 8.810784785748676e-05, "loss": 0.0675, "theoretical_loss": 3.3482073193076296, "tokens_seen": 3012034560 }, { "epoch": 0.91, "learning_rate": 8.802760391590435e-05, "loss": 0.062, "theoretical_loss": 3.3481851241757545, "tokens_seen": 3012296704 }, { "epoch": 0.91, "learning_rate": 8.794735997432195e-05, "loss": 0.0643, "theoretical_loss": 3.3481629315160912, "tokens_seen": 3012558848 }, { "epoch": 0.91, "learning_rate": 8.786711603273953e-05, "loss": 0.0645, "theoretical_loss": 3.348140741328149, "tokens_seen": 3012820992 }, { "epoch": 0.91, "learning_rate": 8.778687209115712e-05, "loss": 0.0652, "theoretical_loss": 3.348118553611438, "tokens_seen": 3013083136 }, { "epoch": 0.91, "learning_rate": 8.770662814957472e-05, "loss": 0.0664, "theoretical_loss": 3.348096368365468, "tokens_seen": 3013345280 }, { "epoch": 0.91, "learning_rate": 8.762638420799229e-05, "loss": 0.0641, "theoretical_loss": 3.348074185589749, "tokens_seen": 3013607424 }, { "epoch": 0.91, "learning_rate": 8.754614026640989e-05, "loss": 0.0644, "theoretical_loss": 3.3480520052837908, "tokens_seen": 3013869568 }, { "epoch": 0.91, "learning_rate": 8.746589632482747e-05, "loss": 0.0629, "theoretical_loss": 3.348029827447104, "tokens_seen": 3014131712 }, { "epoch": 0.91, "learning_rate": 8.738565238324507e-05, "loss": 0.0643, "theoretical_loss": 3.3480076520791986, "tokens_seen": 3014393856 }, { "epoch": 0.91, "objective/train/advantage_avg": 6.449154170695692e-05, "objective/train/docs_used": 1095882, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3385285139083862, "objective/train/original_loss": 1.3385286331176758, "objective/train/theoretical_loss": 3.3479854791795853, "objective/train/tokens_used": 3035116000, "objective/train/value_avg": -0.0070648193359375, "objective/train/value_loss": 0.00022063912183512002, "objective/train/value_max": -1.8477439880371094e-05, "objective/train/value_min": -0.66064453125, "objective/train/value_reward_corr": 0.7004792297942277, "objective/train/value_std": 0.013946533203125, "objective/train/weight_avg": 1.0001661777496338, "objective/train/weighted_lm_loss": 1.3388984203338623, "objective/train/weights_max": 1.2268719673156738, "objective/train/weights_min": 0.3715803325176239, "theoretical_loss": 3.3479854791795853, "tokens_seen": 3014656000 }, { "epoch": 0.91, "learning_rate": 8.730540844166266e-05, "loss": 0.0638, "theoretical_loss": 3.3479854791795853, "tokens_seen": 3014656000 }, { "epoch": 0.91, "learning_rate": 8.722516450008026e-05, "loss": 0.0634, "theoretical_loss": 3.347963308747775, "tokens_seen": 3014918144 }, { "epoch": 0.91, "learning_rate": 8.714492055849783e-05, "loss": 0.0643, "theoretical_loss": 3.347941140783278, "tokens_seen": 3015180288 }, { "epoch": 0.91, "learning_rate": 8.706467661691543e-05, "loss": 0.0647, "theoretical_loss": 3.347918975285606, "tokens_seen": 3015442432 }, { "epoch": 0.91, "learning_rate": 8.698443267533301e-05, "loss": 0.0625, "theoretical_loss": 3.347896812254269, "tokens_seen": 3015704576 }, { "epoch": 0.91, "learning_rate": 8.69041887337506e-05, "loss": 0.0649, "theoretical_loss": 3.3478746516887794, "tokens_seen": 3015966720 }, { "epoch": 0.91, "learning_rate": 8.68239447921682e-05, "loss": 0.0608, "theoretical_loss": 3.347852493588648, "tokens_seen": 3016228864 }, { "epoch": 0.91, "learning_rate": 8.674370085058578e-05, "loss": 0.0628, "theoretical_loss": 3.347830337953386, "tokens_seen": 3016491008 }, { "epoch": 0.91, "learning_rate": 8.666345690900337e-05, "loss": 0.0645, "theoretical_loss": 3.3478081847825054, "tokens_seen": 3016753152 }, { "epoch": 0.91, "learning_rate": 8.658321296742095e-05, "loss": 0.066, "theoretical_loss": 3.3477860340755177, "tokens_seen": 3017015296 }, { "epoch": 0.91, "learning_rate": 8.650296902583855e-05, "loss": 0.0649, "theoretical_loss": 3.3477638858319354, "tokens_seen": 3017277440 }, { "epoch": 0.91, "learning_rate": 8.642272508425614e-05, "loss": 0.0646, "theoretical_loss": 3.34774174005127, "tokens_seen": 3017539584 }, { "epoch": 0.91, "learning_rate": 8.634248114267374e-05, "loss": 0.0628, "theoretical_loss": 3.347719596733034, "tokens_seen": 3017801728 }, { "epoch": 0.91, "learning_rate": 8.626223720109132e-05, "loss": 0.0613, "theoretical_loss": 3.3476974558767396, "tokens_seen": 3018063872 }, { "epoch": 0.91, "learning_rate": 8.618199325950891e-05, "loss": 0.0615, "theoretical_loss": 3.347675317481899, "tokens_seen": 3018326016 }, { "epoch": 0.91, "learning_rate": 8.61017493179265e-05, "loss": 0.0667, "theoretical_loss": 3.3476531815480253, "tokens_seen": 3018588160 }, { "epoch": 0.91, "learning_rate": 8.60215053763441e-05, "loss": 0.0659, "theoretical_loss": 3.347631048074631, "tokens_seen": 3018850304 }, { "epoch": 0.91, "learning_rate": 8.594126143476168e-05, "loss": 0.0634, "theoretical_loss": 3.347608917061229, "tokens_seen": 3019112448 }, { "epoch": 0.91, "learning_rate": 8.586101749317927e-05, "loss": 0.0649, "theoretical_loss": 3.347586788507332, "tokens_seen": 3019374592 }, { "epoch": 0.92, "learning_rate": 8.578077355159686e-05, "loss": 0.0614, "theoretical_loss": 3.347564662412454, "tokens_seen": 3019636736 }, { "epoch": 0.92, "learning_rate": 8.570052961001444e-05, "loss": 0.0623, "theoretical_loss": 3.347542538776108, "tokens_seen": 3019898880 }, { "epoch": 0.92, "learning_rate": 8.562028566843204e-05, "loss": 0.0651, "theoretical_loss": 3.347520417597807, "tokens_seen": 3020161024 }, { "epoch": 0.92, "learning_rate": 8.554004172684962e-05, "loss": 0.064, "theoretical_loss": 3.3474982988770647, "tokens_seen": 3020423168 }, { "epoch": 0.92, "learning_rate": 8.545979778526722e-05, "loss": 0.0625, "theoretical_loss": 3.347476182613395, "tokens_seen": 3020685312 }, { "epoch": 0.92, "learning_rate": 8.53795538436848e-05, "loss": 0.0632, "theoretical_loss": 3.3474540688063117, "tokens_seen": 3020947456 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.0008990961359813809, "objective/train/docs_used": 1098075, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.220808982849121, "objective/train/original_loss": 1.220808982849121, "objective/train/theoretical_loss": 3.347431957455329, "objective/train/tokens_used": 3041669600, "objective/train/value_avg": -0.00785064697265625, "objective/train/value_loss": 0.0002740893396548927, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.482177734375, "objective/train/value_reward_corr": 0.6300108547976242, "objective/train/value_std": 0.0153656005859375, "objective/train/weight_avg": 1.0010228157043457, "objective/train/weighted_lm_loss": 1.222151517868042, "objective/train/weights_max": 1.6195976734161377, "objective/train/weights_min": 0.3715973496437073, "theoretical_loss": 3.347431957455329, "tokens_seen": 3021209600 }, { "epoch": 0.92, "learning_rate": 8.52993099021024e-05, "loss": 0.0622, "theoretical_loss": 3.347431957455329, "tokens_seen": 3021209600 }, { "epoch": 0.92, "learning_rate": 8.521906596051998e-05, "loss": 0.0637, "theoretical_loss": 3.347409848559961, "tokens_seen": 3021471744 }, { "epoch": 0.92, "learning_rate": 8.513882201893758e-05, "loss": 0.0643, "theoretical_loss": 3.347387742119721, "tokens_seen": 3021733888 }, { "epoch": 0.92, "learning_rate": 8.505857807735516e-05, "loss": 0.065, "theoretical_loss": 3.347365638134125, "tokens_seen": 3021996032 }, { "epoch": 0.92, "learning_rate": 8.497833413577275e-05, "loss": 0.063, "theoretical_loss": 3.3473435366026867, "tokens_seen": 3022258176 }, { "epoch": 0.92, "learning_rate": 8.489809019419035e-05, "loss": 0.065, "theoretical_loss": 3.3473214375249207, "tokens_seen": 3022520320 }, { "epoch": 0.92, "learning_rate": 8.481784625260793e-05, "loss": 0.0613, "theoretical_loss": 3.347299340900342, "tokens_seen": 3022782464 }, { "epoch": 0.92, "learning_rate": 8.473760231102552e-05, "loss": 0.0614, "theoretical_loss": 3.3472772467284657, "tokens_seen": 3023044608 }, { "epoch": 0.92, "learning_rate": 8.46573583694431e-05, "loss": 0.0639, "theoretical_loss": 3.3472551550088063, "tokens_seen": 3023306752 }, { "epoch": 0.92, "learning_rate": 8.45771144278607e-05, "loss": 0.0658, "theoretical_loss": 3.34723306574088, "tokens_seen": 3023568896 }, { "epoch": 0.92, "learning_rate": 8.449687048627829e-05, "loss": 0.0635, "theoretical_loss": 3.3472109789242017, "tokens_seen": 3023831040 }, { "epoch": 0.92, "learning_rate": 8.441662654469589e-05, "loss": 0.065, "theoretical_loss": 3.347188894558287, "tokens_seen": 3024093184 }, { "epoch": 0.92, "learning_rate": 8.433638260311347e-05, "loss": 0.0647, "theoretical_loss": 3.347166812642651, "tokens_seen": 3024355328 }, { "epoch": 0.92, "learning_rate": 8.425613866153106e-05, "loss": 0.0649, "theoretical_loss": 3.34714473317681, "tokens_seen": 3024617472 }, { "epoch": 0.92, "learning_rate": 8.417589471994864e-05, "loss": 0.0633, "theoretical_loss": 3.34712265616028, "tokens_seen": 3024879616 }, { "epoch": 0.92, "learning_rate": 8.409565077836623e-05, "loss": 0.0628, "theoretical_loss": 3.347100581592577, "tokens_seen": 3025141760 }, { "epoch": 0.92, "learning_rate": 8.401540683678383e-05, "loss": 0.0643, "theoretical_loss": 3.347078509473217, "tokens_seen": 3025403904 }, { "epoch": 0.92, "learning_rate": 8.393516289520141e-05, "loss": 0.0642, "theoretical_loss": 3.3470564398017166, "tokens_seen": 3025666048 }, { "epoch": 0.92, "learning_rate": 8.385491895361901e-05, "loss": 0.0665, "theoretical_loss": 3.3470343725775917, "tokens_seen": 3025928192 }, { "epoch": 0.92, "learning_rate": 8.377467501203658e-05, "loss": 0.0658, "theoretical_loss": 3.34701230780036, "tokens_seen": 3026190336 }, { "epoch": 0.92, "learning_rate": 8.369443107045418e-05, "loss": 0.0622, "theoretical_loss": 3.3469902454695375, "tokens_seen": 3026452480 }, { "epoch": 0.92, "learning_rate": 8.361418712887177e-05, "loss": 0.0656, "theoretical_loss": 3.346968185584641, "tokens_seen": 3026714624 }, { "epoch": 0.92, "learning_rate": 8.353394318728937e-05, "loss": 0.0645, "theoretical_loss": 3.3469461281451878, "tokens_seen": 3026976768 }, { "epoch": 0.92, "learning_rate": 8.345369924570695e-05, "loss": 0.0662, "theoretical_loss": 3.346924073150695, "tokens_seen": 3027238912 }, { "epoch": 0.92, "learning_rate": 8.337345530412455e-05, "loss": 0.0648, "theoretical_loss": 3.34690202060068, "tokens_seen": 3027501056 }, { "epoch": 0.92, "objective/train/advantage_avg": -8.04458322818391e-05, "objective/train/docs_used": 1100401, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.253341555595398, "objective/train/original_loss": 1.2533414363861084, "objective/train/theoretical_loss": 3.34687997049466, "objective/train/tokens_used": 3048223200, "objective/train/value_avg": -0.0073089599609375, "objective/train/value_loss": 0.0001552473404444754, "objective/train/value_max": -3.4809112548828125e-05, "objective/train/value_min": -0.321533203125, "objective/train/value_reward_corr": 0.7404640557884099, "objective/train/value_std": 0.0133514404296875, "objective/train/weight_avg": 0.9999929666519165, "objective/train/weighted_lm_loss": 1.2528541088104248, "objective/train/weights_max": 1.2134668827056885, "objective/train/weights_min": 0.36880823969841003, "theoretical_loss": 3.34687997049466, "tokens_seen": 3027763200 }, { "epoch": 0.92, "learning_rate": 8.329321136254212e-05, "loss": 0.0649, "theoretical_loss": 3.34687997049466, "tokens_seen": 3027763200 }, { "epoch": 0.92, "learning_rate": 8.321296742095972e-05, "loss": 0.061, "theoretical_loss": 3.346857922832153, "tokens_seen": 3028025344 }, { "epoch": 0.92, "learning_rate": 8.313272347937731e-05, "loss": 0.0648, "theoretical_loss": 3.346835877612676, "tokens_seen": 3028287488 }, { "epoch": 0.92, "learning_rate": 8.30524795377949e-05, "loss": 0.0656, "theoretical_loss": 3.3468138348357472, "tokens_seen": 3028549632 }, { "epoch": 0.92, "learning_rate": 8.297223559621249e-05, "loss": 0.0631, "theoretical_loss": 3.346791794500885, "tokens_seen": 3028811776 }, { "epoch": 0.92, "learning_rate": 8.289199165463008e-05, "loss": 0.0648, "theoretical_loss": 3.3467697566076073, "tokens_seen": 3029073920 }, { "epoch": 0.92, "learning_rate": 8.281174771304766e-05, "loss": 0.0671, "theoretical_loss": 3.346747721155432, "tokens_seen": 3029336064 }, { "epoch": 0.92, "learning_rate": 8.273150377146525e-05, "loss": 0.064, "theoretical_loss": 3.346725688143878, "tokens_seen": 3029598208 }, { "epoch": 0.92, "learning_rate": 8.265125982988285e-05, "loss": 0.0632, "theoretical_loss": 3.3467036575724634, "tokens_seen": 3029860352 }, { "epoch": 0.92, "learning_rate": 8.257101588830043e-05, "loss": 0.0636, "theoretical_loss": 3.346681629440707, "tokens_seen": 3030122496 }, { "epoch": 0.92, "learning_rate": 8.249077194671803e-05, "loss": 0.0633, "theoretical_loss": 3.3466596037481278, "tokens_seen": 3030384640 }, { "epoch": 0.92, "learning_rate": 8.241052800513562e-05, "loss": 0.0635, "theoretical_loss": 3.346637580494244, "tokens_seen": 3030646784 }, { "epoch": 0.92, "learning_rate": 8.23302840635532e-05, "loss": 0.0659, "theoretical_loss": 3.3466155596785763, "tokens_seen": 3030908928 }, { "epoch": 0.92, "learning_rate": 8.225004012197079e-05, "loss": 0.0655, "theoretical_loss": 3.3465935413006425, "tokens_seen": 3031171072 }, { "epoch": 0.92, "learning_rate": 8.216979618038837e-05, "loss": 0.0651, "theoretical_loss": 3.3465715253599617, "tokens_seen": 3031433216 }, { "epoch": 0.92, "learning_rate": 8.208955223880597e-05, "loss": 0.064, "theoretical_loss": 3.346549511856055, "tokens_seen": 3031695360 }, { "epoch": 0.92, "learning_rate": 8.200930829722356e-05, "loss": 0.0645, "theoretical_loss": 3.3465275007884405, "tokens_seen": 3031957504 }, { "epoch": 0.92, "learning_rate": 8.192906435564116e-05, "loss": 0.0646, "theoretical_loss": 3.3465054921566386, "tokens_seen": 3032219648 }, { "epoch": 0.92, "learning_rate": 8.184882041405873e-05, "loss": 0.0637, "theoretical_loss": 3.346483485960169, "tokens_seen": 3032481792 }, { "epoch": 0.92, "learning_rate": 8.176857647247633e-05, "loss": 0.0658, "theoretical_loss": 3.3464614821985523, "tokens_seen": 3032743936 }, { "epoch": 0.92, "learning_rate": 8.168833253089391e-05, "loss": 0.0663, "theoretical_loss": 3.3464394808713083, "tokens_seen": 3033006080 }, { "epoch": 0.92, "learning_rate": 8.160808858931151e-05, "loss": 0.0643, "theoretical_loss": 3.3464174819779564, "tokens_seen": 3033268224 }, { "epoch": 0.92, "learning_rate": 8.15278446477291e-05, "loss": 0.0653, "theoretical_loss": 3.3463954855180185, "tokens_seen": 3033530368 }, { "epoch": 0.92, "learning_rate": 8.14476007061467e-05, "loss": 0.0641, "theoretical_loss": 3.346373491491015, "tokens_seen": 3033792512 }, { "epoch": 0.92, "learning_rate": 8.136735676456427e-05, "loss": 0.0671, "theoretical_loss": 3.346351499896465, "tokens_seen": 3034054656 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.0005928638274781406, "objective/train/docs_used": 1102938, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2831909656524658, "objective/train/original_loss": 1.2831907272338867, "objective/train/theoretical_loss": 3.3463295107338915, "objective/train/tokens_used": 3054776800, "objective/train/value_avg": -0.00801849365234375, "objective/train/value_loss": 0.0004268684715498239, "objective/train/value_max": -2.3186206817626953e-05, "objective/train/value_min": -0.8818359375, "objective/train/value_reward_corr": 0.6643995542878625, "objective/train/value_std": 0.0186004638671875, "objective/train/weight_avg": 1.0007858276367188, "objective/train/weighted_lm_loss": 1.2838879823684692, "objective/train/weights_max": 2.341017961502075, "objective/train/weights_min": 0.3796498477458954, "theoretical_loss": 3.3463295107338915, "tokens_seen": 3034316800 }, { "epoch": 0.92, "learning_rate": 8.128711282298187e-05, "loss": 0.0655, "theoretical_loss": 3.3463295107338915, "tokens_seen": 3034316800 }, { "epoch": 0.92, "learning_rate": 8.120686888139945e-05, "loss": 0.065, "theoretical_loss": 3.346307524002814, "tokens_seen": 3034578944 }, { "epoch": 0.92, "learning_rate": 8.112662493981704e-05, "loss": 0.0643, "theoretical_loss": 3.346285539702754, "tokens_seen": 3034841088 }, { "epoch": 0.92, "learning_rate": 8.104638099823464e-05, "loss": 0.0613, "theoretical_loss": 3.3462635578332334, "tokens_seen": 3035103232 }, { "epoch": 0.92, "learning_rate": 8.096613705665222e-05, "loss": 0.0644, "theoretical_loss": 3.3462415783937725, "tokens_seen": 3035365376 }, { "epoch": 0.92, "learning_rate": 8.088589311506981e-05, "loss": 0.0654, "theoretical_loss": 3.3462196013838934, "tokens_seen": 3035627520 }, { "epoch": 0.92, "learning_rate": 8.08056491734874e-05, "loss": 0.0649, "theoretical_loss": 3.3461976268031184, "tokens_seen": 3035889664 }, { "epoch": 0.92, "learning_rate": 8.0725405231905e-05, "loss": 0.062, "theoretical_loss": 3.3461756546509682, "tokens_seen": 3036151808 }, { "epoch": 0.92, "learning_rate": 8.064516129032258e-05, "loss": 0.0639, "theoretical_loss": 3.346153684926965, "tokens_seen": 3036413952 }, { "epoch": 0.92, "learning_rate": 8.056491734874018e-05, "loss": 0.0638, "theoretical_loss": 3.3461317176306316, "tokens_seen": 3036676096 }, { "epoch": 0.92, "learning_rate": 8.048467340715777e-05, "loss": 0.0662, "theoretical_loss": 3.3461097527614894, "tokens_seen": 3036938240 }, { "epoch": 0.92, "learning_rate": 8.040442946557535e-05, "loss": 0.0642, "theoretical_loss": 3.3460877903190607, "tokens_seen": 3037200384 }, { "epoch": 0.92, "learning_rate": 8.032418552399294e-05, "loss": 0.0627, "theoretical_loss": 3.3460658303028685, "tokens_seen": 3037462528 }, { "epoch": 0.92, "learning_rate": 8.024394158241052e-05, "loss": 0.064, "theoretical_loss": 3.3460438727124355, "tokens_seen": 3037724672 }, { "epoch": 0.92, "learning_rate": 8.016369764082812e-05, "loss": 0.0656, "theoretical_loss": 3.346021917547284, "tokens_seen": 3037986816 }, { "epoch": 0.92, "learning_rate": 8.00834536992457e-05, "loss": 0.0641, "theoretical_loss": 3.345999964806937, "tokens_seen": 3038248960 }, { "epoch": 0.92, "learning_rate": 8.00032097576633e-05, "loss": 0.0642, "theoretical_loss": 3.3459780144909175, "tokens_seen": 3038511104 }, { "epoch": 0.92, "learning_rate": 7.992296581608088e-05, "loss": 0.0605, "theoretical_loss": 3.3459560665987484, "tokens_seen": 3038773248 }, { "epoch": 0.92, "learning_rate": 7.984272187449848e-05, "loss": 0.0623, "theoretical_loss": 3.3459341211299534, "tokens_seen": 3039035392 }, { "epoch": 0.92, "learning_rate": 7.976247793291606e-05, "loss": 0.0634, "theoretical_loss": 3.345912178084056, "tokens_seen": 3039297536 }, { "epoch": 0.92, "learning_rate": 7.968223399133366e-05, "loss": 0.0653, "theoretical_loss": 3.3458902374605795, "tokens_seen": 3039559680 }, { "epoch": 0.92, "learning_rate": 7.960199004975125e-05, "loss": 0.0647, "theoretical_loss": 3.3458682992590476, "tokens_seen": 3039821824 }, { "epoch": 0.92, "learning_rate": 7.952174610816885e-05, "loss": 0.0634, "theoretical_loss": 3.345846363478984, "tokens_seen": 3040083968 }, { "epoch": 0.92, "learning_rate": 7.944150216658642e-05, "loss": 0.063, "theoretical_loss": 3.345824430119913, "tokens_seen": 3040346112 }, { "epoch": 0.92, "learning_rate": 7.9361258225004e-05, "loss": 0.0652, "theoretical_loss": 3.3458024991813584, "tokens_seen": 3040608256 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.0005279024480842054, "objective/train/docs_used": 1105174, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3808923959732056, "objective/train/original_loss": 1.3808923959732056, "objective/train/theoretical_loss": 3.3457805706628445, "objective/train/tokens_used": 3061330400, "objective/train/value_avg": -0.00731658935546875, "objective/train/value_loss": 0.00018059344438370317, "objective/train/value_max": -2.753734588623047e-05, "objective/train/value_min": -0.9140625, "objective/train/value_reward_corr": 0.7750596911751518, "objective/train/value_std": 0.016510009765625, "objective/train/weight_avg": 1.0006126165390015, "objective/train/weighted_lm_loss": 1.3814222812652588, "objective/train/weights_max": 1.484392523765564, "objective/train/weights_min": 0.4039379060268402, "theoretical_loss": 3.3457805706628445, "tokens_seen": 3040870400 }, { "epoch": 0.92, "learning_rate": 7.92810142834216e-05, "loss": 0.0651, "theoretical_loss": 3.3457805706628445, "tokens_seen": 3040870400 }, { "epoch": 0.92, "learning_rate": 7.920077034183919e-05, "loss": 0.0632, "theoretical_loss": 3.345758644563896, "tokens_seen": 3041132544 }, { "epoch": 0.92, "learning_rate": 7.912052640025679e-05, "loss": 0.0613, "theoretical_loss": 3.345736720884037, "tokens_seen": 3041394688 }, { "epoch": 0.92, "learning_rate": 7.904028245867437e-05, "loss": 0.0617, "theoretical_loss": 3.345714799622792, "tokens_seen": 3041656832 }, { "epoch": 0.92, "learning_rate": 7.896003851709197e-05, "loss": 0.064, "theoretical_loss": 3.345692880779686, "tokens_seen": 3041918976 }, { "epoch": 0.92, "learning_rate": 7.887979457550954e-05, "loss": 0.064, "theoretical_loss": 3.345670964354244, "tokens_seen": 3042181120 }, { "epoch": 0.92, "learning_rate": 7.879955063392714e-05, "loss": 0.063, "theoretical_loss": 3.3456490503459912, "tokens_seen": 3042443264 }, { "epoch": 0.92, "learning_rate": 7.871930669234473e-05, "loss": 0.0649, "theoretical_loss": 3.3456271387544523, "tokens_seen": 3042705408 }, { "epoch": 0.92, "learning_rate": 7.863906275076233e-05, "loss": 0.0649, "theoretical_loss": 3.345605229579153, "tokens_seen": 3042967552 }, { "epoch": 0.92, "learning_rate": 7.855881880917991e-05, "loss": 0.0643, "theoretical_loss": 3.345583322819618, "tokens_seen": 3043229696 }, { "epoch": 0.92, "learning_rate": 7.847857486759751e-05, "loss": 0.0643, "theoretical_loss": 3.345561418475374, "tokens_seen": 3043491840 }, { "epoch": 0.92, "learning_rate": 7.839833092601508e-05, "loss": 0.0609, "theoretical_loss": 3.3455395165459456, "tokens_seen": 3043753984 }, { "epoch": 0.92, "learning_rate": 7.831808698443267e-05, "loss": 0.0625, "theoretical_loss": 3.3455176170308594, "tokens_seen": 3044016128 }, { "epoch": 0.92, "learning_rate": 7.823784304285027e-05, "loss": 0.0638, "theoretical_loss": 3.3454957199296413, "tokens_seen": 3044278272 }, { "epoch": 0.92, "learning_rate": 7.815759910126785e-05, "loss": 0.0651, "theoretical_loss": 3.3454738252418172, "tokens_seen": 3044540416 }, { "epoch": 0.92, "learning_rate": 7.807735515968545e-05, "loss": 0.0653, "theoretical_loss": 3.3454519329669132, "tokens_seen": 3044802560 }, { "epoch": 0.92, "learning_rate": 7.799711121810304e-05, "loss": 0.0646, "theoretical_loss": 3.345430043104456, "tokens_seen": 3045064704 }, { "epoch": 0.92, "learning_rate": 7.791686727652062e-05, "loss": 0.0645, "theoretical_loss": 3.345408155653972, "tokens_seen": 3045326848 }, { "epoch": 0.92, "learning_rate": 7.783662333493821e-05, "loss": 0.0642, "theoretical_loss": 3.3453862706149877, "tokens_seen": 3045588992 }, { "epoch": 0.92, "learning_rate": 7.775637939335581e-05, "loss": 0.0654, "theoretical_loss": 3.34536438798703, "tokens_seen": 3045851136 }, { "epoch": 0.92, "learning_rate": 7.76761354517734e-05, "loss": 0.0625, "theoretical_loss": 3.345342507769626, "tokens_seen": 3046113280 }, { "epoch": 0.92, "learning_rate": 7.759589151019099e-05, "loss": 0.0636, "theoretical_loss": 3.345320629962302, "tokens_seen": 3046375424 }, { "epoch": 0.92, "learning_rate": 7.751564756860858e-05, "loss": 0.0617, "theoretical_loss": 3.345298754564586, "tokens_seen": 3046637568 }, { "epoch": 0.92, "learning_rate": 7.743540362702615e-05, "loss": 0.063, "theoretical_loss": 3.3452768815760052, "tokens_seen": 3046899712 }, { "epoch": 0.92, "learning_rate": 7.735515968544375e-05, "loss": 0.0646, "theoretical_loss": 3.3452550109960866, "tokens_seen": 3047161856 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.00029660866130143404, "objective/train/docs_used": 1107536, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.388244390487671, "objective/train/original_loss": 1.3882445096969604, "objective/train/theoretical_loss": 3.3452331428243585, "objective/train/tokens_used": 3067884000, "objective/train/value_avg": -0.007091522216796875, "objective/train/value_loss": 0.00014839263167232275, "objective/train/value_max": -1.9550323486328125e-05, "objective/train/value_min": -0.384521484375, "objective/train/value_reward_corr": 0.7271648855293683, "objective/train/value_std": 0.01367950439453125, "objective/train/weight_avg": 1.0003644227981567, "objective/train/weighted_lm_loss": 1.3882359266281128, "objective/train/weights_max": 1.3238147497177124, "objective/train/weights_min": 0.3687090575695038, "theoretical_loss": 3.3452331428243585, "tokens_seen": 3047424000 }, { "epoch": 0.92, "learning_rate": 7.727491574386133e-05, "loss": 0.0663, "theoretical_loss": 3.3452331428243585, "tokens_seen": 3047424000 }, { "epoch": 0.92, "learning_rate": 7.719467180227893e-05, "loss": 0.0628, "theoretical_loss": 3.345211277060348, "tokens_seen": 3047686144 }, { "epoch": 0.92, "learning_rate": 7.711442786069652e-05, "loss": 0.0649, "theoretical_loss": 3.345189413703583, "tokens_seen": 3047948288 }, { "epoch": 0.92, "learning_rate": 7.703418391911412e-05, "loss": 0.0653, "theoretical_loss": 3.3451675527535913, "tokens_seen": 3048210432 }, { "epoch": 0.92, "learning_rate": 7.695393997753169e-05, "loss": 0.0641, "theoretical_loss": 3.3451456942099016, "tokens_seen": 3048472576 }, { "epoch": 0.92, "learning_rate": 7.687369603594929e-05, "loss": 0.0649, "theoretical_loss": 3.3451238380720416, "tokens_seen": 3048734720 }, { "epoch": 0.92, "learning_rate": 7.679345209436687e-05, "loss": 0.063, "theoretical_loss": 3.34510198433954, "tokens_seen": 3048996864 }, { "epoch": 0.92, "learning_rate": 7.671320815278447e-05, "loss": 0.0633, "theoretical_loss": 3.3450801330119253, "tokens_seen": 3049259008 }, { "epoch": 0.92, "learning_rate": 7.663296421120206e-05, "loss": 0.0621, "theoretical_loss": 3.3450582840887257, "tokens_seen": 3049521152 }, { "epoch": 0.92, "learning_rate": 7.655272026961964e-05, "loss": 0.0647, "theoretical_loss": 3.3450364375694708, "tokens_seen": 3049783296 }, { "epoch": 0.92, "learning_rate": 7.647247632803723e-05, "loss": 0.0626, "theoretical_loss": 3.345014593453689, "tokens_seen": 3050045440 }, { "epoch": 0.92, "learning_rate": 7.639223238645482e-05, "loss": 0.0657, "theoretical_loss": 3.344992751740909, "tokens_seen": 3050307584 }, { "epoch": 0.92, "learning_rate": 7.631198844487241e-05, "loss": 0.0631, "theoretical_loss": 3.344970912430661, "tokens_seen": 3050569728 }, { "epoch": 0.92, "learning_rate": 7.623174450329e-05, "loss": 0.0647, "theoretical_loss": 3.344949075522473, "tokens_seen": 3050831872 }, { "epoch": 0.92, "learning_rate": 7.61515005617076e-05, "loss": 0.0633, "theoretical_loss": 3.3449272410158755, "tokens_seen": 3051094016 }, { "epoch": 0.92, "learning_rate": 7.607125662012518e-05, "loss": 0.0671, "theoretical_loss": 3.3449054089103973, "tokens_seen": 3051356160 }, { "epoch": 0.92, "learning_rate": 7.599101267854277e-05, "loss": 0.0624, "theoretical_loss": 3.3448835792055687, "tokens_seen": 3051618304 }, { "epoch": 0.92, "learning_rate": 7.591076873696036e-05, "loss": 0.0626, "theoretical_loss": 3.344861751900919, "tokens_seen": 3051880448 }, { "epoch": 0.92, "learning_rate": 7.583052479537795e-05, "loss": 0.0651, "theoretical_loss": 3.344839926995979, "tokens_seen": 3052142592 }, { "epoch": 0.93, "learning_rate": 7.575028085379554e-05, "loss": 0.0638, "theoretical_loss": 3.3448181044902783, "tokens_seen": 3052404736 }, { "epoch": 0.93, "learning_rate": 7.567003691221314e-05, "loss": 0.0663, "theoretical_loss": 3.3447962843833468, "tokens_seen": 3052666880 }, { "epoch": 0.93, "learning_rate": 7.558979297063073e-05, "loss": 0.0633, "theoretical_loss": 3.344774466674715, "tokens_seen": 3052929024 }, { "epoch": 0.93, "learning_rate": 7.55095490290483e-05, "loss": 0.0652, "theoretical_loss": 3.3447526513639136, "tokens_seen": 3053191168 }, { "epoch": 0.93, "learning_rate": 7.54293050874659e-05, "loss": 0.0636, "theoretical_loss": 3.3447308384504737, "tokens_seen": 3053453312 }, { "epoch": 0.93, "learning_rate": 7.534906114588348e-05, "loss": 0.0634, "theoretical_loss": 3.3447090279339253, "tokens_seen": 3053715456 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.0005677227163687348, "objective/train/docs_used": 1109702, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2256311178207397, "objective/train/original_loss": 1.2256312370300293, "objective/train/theoretical_loss": 3.344687219813799, "objective/train/tokens_used": 3074437600, "objective/train/value_avg": -0.0052642822265625, "objective/train/value_loss": 0.00013756824773736298, "objective/train/value_max": -3.2961368560791016e-05, "objective/train/value_min": -0.323486328125, "objective/train/value_reward_corr": 0.6535918841042918, "objective/train/value_std": 0.01059722900390625, "objective/train/weight_avg": 1.000628113746643, "objective/train/weighted_lm_loss": 1.226385474205017, "objective/train/weights_max": 1.242395043373108, "objective/train/weights_min": 0.3693855106830597, "theoretical_loss": 3.344687219813799, "tokens_seen": 3053977600 }, { "epoch": 0.93, "learning_rate": 7.526881720430108e-05, "loss": 0.0632, "theoretical_loss": 3.344687219813799, "tokens_seen": 3053977600 }, { "epoch": 0.93, "learning_rate": 7.518857326271867e-05, "loss": 0.0643, "theoretical_loss": 3.344665414089627, "tokens_seen": 3054239744 }, { "epoch": 0.93, "learning_rate": 7.510832932113627e-05, "loss": 0.0641, "theoretical_loss": 3.3446436107609396, "tokens_seen": 3054501888 }, { "epoch": 0.93, "learning_rate": 7.502808537955384e-05, "loss": 0.0638, "theoretical_loss": 3.3446218098272684, "tokens_seen": 3054764032 }, { "epoch": 0.93, "learning_rate": 7.494784143797144e-05, "loss": 0.0654, "theoretical_loss": 3.344600011288145, "tokens_seen": 3055026176 }, { "epoch": 0.93, "learning_rate": 7.486759749638902e-05, "loss": 0.0669, "theoretical_loss": 3.3445782151431005, "tokens_seen": 3055288320 }, { "epoch": 0.93, "learning_rate": 7.478735355480662e-05, "loss": 0.0636, "theoretical_loss": 3.3445564213916668, "tokens_seen": 3055550464 }, { "epoch": 0.93, "learning_rate": 7.47071096132242e-05, "loss": 0.0644, "theoretical_loss": 3.3445346300333756, "tokens_seen": 3055812608 }, { "epoch": 0.93, "learning_rate": 7.462686567164179e-05, "loss": 0.0649, "theoretical_loss": 3.344512841067759, "tokens_seen": 3056074752 }, { "epoch": 0.93, "learning_rate": 7.454662173005938e-05, "loss": 0.0615, "theoretical_loss": 3.344491054494349, "tokens_seen": 3056336896 }, { "epoch": 0.93, "learning_rate": 7.446637778847696e-05, "loss": 0.064, "theoretical_loss": 3.3444692703126777, "tokens_seen": 3056599040 }, { "epoch": 0.93, "learning_rate": 7.438613384689456e-05, "loss": 0.0635, "theoretical_loss": 3.3444474885222784, "tokens_seen": 3056861184 }, { "epoch": 0.93, "learning_rate": 7.430588990531215e-05, "loss": 0.0626, "theoretical_loss": 3.3444257091226817, "tokens_seen": 3057123328 }, { "epoch": 0.93, "learning_rate": 7.422564596372975e-05, "loss": 0.0647, "theoretical_loss": 3.344403932113422, "tokens_seen": 3057385472 }, { "epoch": 0.93, "learning_rate": 7.414540202214733e-05, "loss": 0.0638, "theoretical_loss": 3.344382157494031, "tokens_seen": 3057647616 }, { "epoch": 0.93, "learning_rate": 7.406515808056492e-05, "loss": 0.0642, "theoretical_loss": 3.344360385264042, "tokens_seen": 3057909760 }, { "epoch": 0.93, "learning_rate": 7.39849141389825e-05, "loss": 0.0649, "theoretical_loss": 3.3443386154229877, "tokens_seen": 3058171904 }, { "epoch": 0.93, "learning_rate": 7.39046701974001e-05, "loss": 0.0642, "theoretical_loss": 3.344316847970402, "tokens_seen": 3058434048 }, { "epoch": 0.93, "learning_rate": 7.382442625581769e-05, "loss": 0.0634, "theoretical_loss": 3.344295082905817, "tokens_seen": 3058696192 }, { "epoch": 0.93, "learning_rate": 7.374418231423529e-05, "loss": 0.0656, "theoretical_loss": 3.344273320228767, "tokens_seen": 3058958336 }, { "epoch": 0.93, "learning_rate": 7.366393837265287e-05, "loss": 0.0608, "theoretical_loss": 3.344251559938785, "tokens_seen": 3059220480 }, { "epoch": 0.93, "learning_rate": 7.358369443107044e-05, "loss": 0.0643, "theoretical_loss": 3.3442298020354047, "tokens_seen": 3059482624 }, { "epoch": 0.93, "learning_rate": 7.350345048948804e-05, "loss": 0.0641, "theoretical_loss": 3.3442080465181605, "tokens_seen": 3059744768 }, { "epoch": 0.93, "learning_rate": 7.342320654790563e-05, "loss": 0.064, "theoretical_loss": 3.3441862933865854, "tokens_seen": 3060006912 }, { "epoch": 0.93, "learning_rate": 7.334296260632323e-05, "loss": 0.0615, "theoretical_loss": 3.3441645426402142, "tokens_seen": 3060269056 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.0004281561414245516, "objective/train/docs_used": 1112043, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2398734092712402, "objective/train/original_loss": 1.2398734092712402, "objective/train/theoretical_loss": 3.344142794278581, "objective/train/tokens_used": 3080991200, "objective/train/value_avg": -0.00775909423828125, "objective/train/value_loss": 0.00013446260709315538, "objective/train/value_max": -3.2961368560791016e-05, "objective/train/value_min": -0.2418212890625, "objective/train/value_reward_corr": 0.7998507260566405, "objective/train/value_std": 0.015167236328125, "objective/train/weight_avg": 1.0004942417144775, "objective/train/weighted_lm_loss": 1.2405970096588135, "objective/train/weights_max": 1.2352036237716675, "objective/train/weights_min": 0.6098437309265137, "theoretical_loss": 3.344142794278581, "tokens_seen": 3060531200 }, { "epoch": 0.93, "learning_rate": 7.326271866474081e-05, "loss": 0.0621, "theoretical_loss": 3.344142794278581, "tokens_seen": 3060531200 }, { "epoch": 0.93, "learning_rate": 7.318247472315841e-05, "loss": 0.0633, "theoretical_loss": 3.34412104830122, "tokens_seen": 3060793344 }, { "epoch": 0.93, "learning_rate": 7.310223078157598e-05, "loss": 0.062, "theoretical_loss": 3.344099304707665, "tokens_seen": 3061055488 }, { "epoch": 0.93, "learning_rate": 7.302198683999358e-05, "loss": 0.062, "theoretical_loss": 3.3440775634974513, "tokens_seen": 3061317632 }, { "epoch": 0.93, "learning_rate": 7.294174289841117e-05, "loss": 0.0631, "theoretical_loss": 3.3440558246701135, "tokens_seen": 3061579776 }, { "epoch": 0.93, "learning_rate": 7.286149895682877e-05, "loss": 0.0644, "theoretical_loss": 3.3440340882251864, "tokens_seen": 3061841920 }, { "epoch": 0.93, "learning_rate": 7.278125501524635e-05, "loss": 0.0654, "theoretical_loss": 3.344012354162205, "tokens_seen": 3062104064 }, { "epoch": 0.93, "learning_rate": 7.270101107366394e-05, "loss": 0.063, "theoretical_loss": 3.3439906224807046, "tokens_seen": 3062366208 }, { "epoch": 0.93, "learning_rate": 7.262076713208152e-05, "loss": 0.0645, "theoretical_loss": 3.34396889318022, "tokens_seen": 3062628352 }, { "epoch": 0.93, "learning_rate": 7.254052319049911e-05, "loss": 0.0643, "theoretical_loss": 3.3439471662602864, "tokens_seen": 3062890496 }, { "epoch": 0.93, "learning_rate": 7.246027924891671e-05, "loss": 0.0649, "theoretical_loss": 3.34392544172044, "tokens_seen": 3063152640 }, { "epoch": 0.93, "learning_rate": 7.23800353073343e-05, "loss": 0.0636, "theoretical_loss": 3.3439037195602155, "tokens_seen": 3063414784 }, { "epoch": 0.93, "learning_rate": 7.22997913657519e-05, "loss": 0.0648, "theoretical_loss": 3.3438819997791493, "tokens_seen": 3063676928 }, { "epoch": 0.93, "learning_rate": 7.221954742416948e-05, "loss": 0.0645, "theoretical_loss": 3.343860282376778, "tokens_seen": 3063939072 }, { "epoch": 0.93, "learning_rate": 7.213930348258706e-05, "loss": 0.0614, "theoretical_loss": 3.343838567352636, "tokens_seen": 3064201216 }, { "epoch": 0.93, "learning_rate": 7.205905954100465e-05, "loss": 0.0619, "theoretical_loss": 3.34381685470626, "tokens_seen": 3064463360 }, { "epoch": 0.93, "learning_rate": 7.197881559942225e-05, "loss": 0.0661, "theoretical_loss": 3.3437951444371867, "tokens_seen": 3064725504 }, { "epoch": 0.93, "learning_rate": 7.189857165783983e-05, "loss": 0.0647, "theoretical_loss": 3.3437734365449527, "tokens_seen": 3064987648 }, { "epoch": 0.93, "learning_rate": 7.181832771625742e-05, "loss": 0.0651, "theoretical_loss": 3.3437517310290934, "tokens_seen": 3065249792 }, { "epoch": 0.93, "learning_rate": 7.173808377467502e-05, "loss": 0.0626, "theoretical_loss": 3.343730027889147, "tokens_seen": 3065511936 }, { "epoch": 0.93, "learning_rate": 7.165783983309259e-05, "loss": 0.0634, "theoretical_loss": 3.3437083271246486, "tokens_seen": 3065774080 }, { "epoch": 0.93, "learning_rate": 7.157759589151019e-05, "loss": 0.0624, "theoretical_loss": 3.343686628735137, "tokens_seen": 3066036224 }, { "epoch": 0.93, "learning_rate": 7.149735194992778e-05, "loss": 0.0623, "theoretical_loss": 3.3436649327201473, "tokens_seen": 3066298368 }, { "epoch": 0.93, "learning_rate": 7.141710800834537e-05, "loss": 0.0674, "theoretical_loss": 3.343643239079218, "tokens_seen": 3066560512 }, { "epoch": 0.93, "learning_rate": 7.133686406676296e-05, "loss": 0.0615, "theoretical_loss": 3.343621547811886, "tokens_seen": 3066822656 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.0005850589368492365, "objective/train/docs_used": 1114524, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3618371486663818, "objective/train/original_loss": 1.3618371486663818, "objective/train/theoretical_loss": 3.3435998589176883, "objective/train/tokens_used": 3087544800, "objective/train/value_avg": -0.007190704345703125, "objective/train/value_loss": 0.00024140205641742796, "objective/train/value_max": -2.586841583251953e-05, "objective/train/value_min": -0.97705078125, "objective/train/value_reward_corr": 0.685630743050589, "objective/train/value_std": 0.01404571533203125, "objective/train/weight_avg": 1.0006887912750244, "objective/train/weighted_lm_loss": 1.3621577024459839, "objective/train/weights_max": 1.189415454864502, "objective/train/weights_min": 0.2359674870967865, "theoretical_loss": 3.3435998589176883, "tokens_seen": 3067084800 }, { "epoch": 0.93, "learning_rate": 7.125662012518056e-05, "loss": 0.0651, "theoretical_loss": 3.3435998589176883, "tokens_seen": 3067084800 }, { "epoch": 0.93, "learning_rate": 7.117637618359813e-05, "loss": 0.061, "theoretical_loss": 3.343578172396163, "tokens_seen": 3067346944 }, { "epoch": 0.93, "learning_rate": 7.109613224201573e-05, "loss": 0.0642, "theoretical_loss": 3.3435564882468483, "tokens_seen": 3067609088 }, { "epoch": 0.93, "learning_rate": 7.101588830043332e-05, "loss": 0.0623, "theoretical_loss": 3.343534806469281, "tokens_seen": 3067871232 }, { "epoch": 0.93, "learning_rate": 7.093564435885091e-05, "loss": 0.0656, "theoretical_loss": 3.343513127063, "tokens_seen": 3068133376 }, { "epoch": 0.93, "learning_rate": 7.08554004172685e-05, "loss": 0.0648, "theoretical_loss": 3.343491450027542, "tokens_seen": 3068395520 }, { "epoch": 0.93, "learning_rate": 7.077515647568609e-05, "loss": 0.0652, "theoretical_loss": 3.343469775362447, "tokens_seen": 3068657664 }, { "epoch": 0.93, "learning_rate": 7.069491253410367e-05, "loss": 0.0628, "theoretical_loss": 3.3434481030672516, "tokens_seen": 3068919808 }, { "epoch": 0.93, "learning_rate": 7.061466859252126e-05, "loss": 0.0629, "theoretical_loss": 3.343426433141496, "tokens_seen": 3069181952 }, { "epoch": 0.93, "learning_rate": 7.053442465093886e-05, "loss": 0.0615, "theoretical_loss": 3.343404765584717, "tokens_seen": 3069444096 }, { "epoch": 0.93, "learning_rate": 7.045418070935644e-05, "loss": 0.0636, "theoretical_loss": 3.343383100396455, "tokens_seen": 3069706240 }, { "epoch": 0.93, "learning_rate": 7.037393676777404e-05, "loss": 0.0631, "theoretical_loss": 3.343361437576248, "tokens_seen": 3069968384 }, { "epoch": 0.93, "learning_rate": 7.029369282619163e-05, "loss": 0.0652, "theoretical_loss": 3.343339777123635, "tokens_seen": 3070230528 }, { "epoch": 0.93, "learning_rate": 7.021344888460923e-05, "loss": 0.0651, "theoretical_loss": 3.343318119038155, "tokens_seen": 3070492672 }, { "epoch": 0.93, "learning_rate": 7.01332049430268e-05, "loss": 0.0662, "theoretical_loss": 3.343296463319348, "tokens_seen": 3070754816 }, { "epoch": 0.93, "learning_rate": 7.00529610014444e-05, "loss": 0.0646, "theoretical_loss": 3.3432748099667524, "tokens_seen": 3071016960 }, { "epoch": 0.93, "learning_rate": 6.997271705986198e-05, "loss": 0.0616, "theoretical_loss": 3.343253158979908, "tokens_seen": 3071279104 }, { "epoch": 0.93, "learning_rate": 6.989247311827957e-05, "loss": 0.0642, "theoretical_loss": 3.343231510358355, "tokens_seen": 3071541248 }, { "epoch": 0.93, "learning_rate": 6.981222917669717e-05, "loss": 0.0637, "theoretical_loss": 3.343209864101633, "tokens_seen": 3071803392 }, { "epoch": 0.93, "learning_rate": 6.973198523511474e-05, "loss": 0.066, "theoretical_loss": 3.343188220209281, "tokens_seen": 3072065536 }, { "epoch": 0.93, "learning_rate": 6.965174129353234e-05, "loss": 0.0635, "theoretical_loss": 3.34316657868084, "tokens_seen": 3072327680 }, { "epoch": 0.93, "learning_rate": 6.957149735194992e-05, "loss": 0.0643, "theoretical_loss": 3.34314493951585, "tokens_seen": 3072589824 }, { "epoch": 0.93, "learning_rate": 6.949125341036752e-05, "loss": 0.0656, "theoretical_loss": 3.343123302713851, "tokens_seen": 3072851968 }, { "epoch": 0.93, "learning_rate": 6.941100946878511e-05, "loss": 0.0658, "theoretical_loss": 3.3431016682743833, "tokens_seen": 3073114112 }, { "epoch": 0.93, "learning_rate": 6.93307655272027e-05, "loss": 0.0645, "theoretical_loss": 3.343080036196988, "tokens_seen": 3073376256 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.0006747455336153507, "objective/train/docs_used": 1116766, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1027328968048096, "objective/train/original_loss": 1.1027328968048096, "objective/train/theoretical_loss": 3.3430584064812052, "objective/train/tokens_used": 3094098400, "objective/train/value_avg": -0.0059967041015625, "objective/train/value_loss": 0.0001406289666192606, "objective/train/value_max": -2.6047229766845703e-05, "objective/train/value_min": -0.935546875, "objective/train/value_reward_corr": 0.7394594899401773, "objective/train/value_std": 0.01275634765625, "objective/train/weight_avg": 1.000738263130188, "objective/train/weighted_lm_loss": 1.1030726432800293, "objective/train/weights_max": 1.1870945692062378, "objective/train/weights_min": 0.3722897469997406, "theoretical_loss": 3.3430584064812052, "tokens_seen": 3073638400 }, { "epoch": 0.93, "learning_rate": 6.925052158562029e-05, "loss": 0.0608, "theoretical_loss": 3.3430584064812052, "tokens_seen": 3073638400 }, { "epoch": 0.93, "learning_rate": 6.917027764403788e-05, "loss": 0.0622, "theoretical_loss": 3.3430367791265763, "tokens_seen": 3073900544 }, { "epoch": 0.93, "learning_rate": 6.909003370245546e-05, "loss": 0.0625, "theoretical_loss": 3.3430151541326416, "tokens_seen": 3074162688 }, { "epoch": 0.93, "learning_rate": 6.900978976087306e-05, "loss": 0.0659, "theoretical_loss": 3.3429935314989425, "tokens_seen": 3074424832 }, { "epoch": 0.93, "learning_rate": 6.892954581929065e-05, "loss": 0.0612, "theoretical_loss": 3.34297191122502, "tokens_seen": 3074686976 }, { "epoch": 0.93, "learning_rate": 6.884930187770823e-05, "loss": 0.063, "theoretical_loss": 3.342950293310415, "tokens_seen": 3074949120 }, { "epoch": 0.93, "learning_rate": 6.876905793612583e-05, "loss": 0.0628, "theoretical_loss": 3.3429286777546703, "tokens_seen": 3075211264 }, { "epoch": 0.93, "learning_rate": 6.86888139945434e-05, "loss": 0.0623, "theoretical_loss": 3.342907064557326, "tokens_seen": 3075473408 }, { "epoch": 0.93, "learning_rate": 6.8608570052961e-05, "loss": 0.0662, "theoretical_loss": 3.3428854537179244, "tokens_seen": 3075735552 }, { "epoch": 0.93, "learning_rate": 6.852832611137859e-05, "loss": 0.0607, "theoretical_loss": 3.3428638452360078, "tokens_seen": 3075997696 }, { "epoch": 0.93, "learning_rate": 6.844808216979619e-05, "loss": 0.063, "theoretical_loss": 3.342842239111117, "tokens_seen": 3076259840 }, { "epoch": 0.93, "learning_rate": 6.836783822821377e-05, "loss": 0.0608, "theoretical_loss": 3.342820635342795, "tokens_seen": 3076521984 }, { "epoch": 0.93, "learning_rate": 6.828759428663137e-05, "loss": 0.0637, "theoretical_loss": 3.342799033930584, "tokens_seen": 3076784128 }, { "epoch": 0.93, "learning_rate": 6.820735034504894e-05, "loss": 0.0644, "theoretical_loss": 3.3427774348740256, "tokens_seen": 3077046272 }, { "epoch": 0.93, "learning_rate": 6.812710640346654e-05, "loss": 0.0637, "theoretical_loss": 3.342755838172663, "tokens_seen": 3077308416 }, { "epoch": 0.93, "learning_rate": 6.804686246188413e-05, "loss": 0.0658, "theoretical_loss": 3.3427342438260386, "tokens_seen": 3077570560 }, { "epoch": 0.93, "learning_rate": 6.796661852030171e-05, "loss": 0.0658, "theoretical_loss": 3.342712651833695, "tokens_seen": 3077832704 }, { "epoch": 0.93, "learning_rate": 6.788637457871931e-05, "loss": 0.0648, "theoretical_loss": 3.342691062195175, "tokens_seen": 3078094848 }, { "epoch": 0.93, "learning_rate": 6.78061306371369e-05, "loss": 0.066, "theoretical_loss": 3.3426694749100214, "tokens_seen": 3078356992 }, { "epoch": 0.93, "learning_rate": 6.772588669555448e-05, "loss": 0.0617, "theoretical_loss": 3.342647889977778, "tokens_seen": 3078619136 }, { "epoch": 0.93, "learning_rate": 6.764564275397207e-05, "loss": 0.0653, "theoretical_loss": 3.342626307397987, "tokens_seen": 3078881280 }, { "epoch": 0.93, "learning_rate": 6.756539881238967e-05, "loss": 0.064, "theoretical_loss": 3.342604727170193, "tokens_seen": 3079143424 }, { "epoch": 0.93, "learning_rate": 6.748515487080725e-05, "loss": 0.0615, "theoretical_loss": 3.3425831492939384, "tokens_seen": 3079405568 }, { "epoch": 0.93, "learning_rate": 6.740491092922485e-05, "loss": 0.0629, "theoretical_loss": 3.3425615737687675, "tokens_seen": 3079667712 }, { "epoch": 0.93, "learning_rate": 6.732466698764244e-05, "loss": 0.0633, "theoretical_loss": 3.342540000594224, "tokens_seen": 3079929856 }, { "epoch": 0.93, "objective/train/advantage_avg": -0.00011591265501920134, "objective/train/docs_used": 1119127, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1800016164779663, "objective/train/original_loss": 1.1800014972686768, "objective/train/theoretical_loss": 3.342518429769851, "objective/train/tokens_used": 3100652000, "objective/train/value_avg": -0.0055389404296875, "objective/train/value_loss": 0.00014903591363690794, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.31640625, "objective/train/value_reward_corr": 0.6550612583763221, "objective/train/value_std": 0.0103302001953125, "objective/train/weight_avg": 0.9999504685401917, "objective/train/weighted_lm_loss": 1.1799626350402832, "objective/train/weights_max": 1.2208958864212036, "objective/train/weights_min": 0.37342190742492676, "theoretical_loss": 3.342518429769851, "tokens_seen": 3080192000 }, { "epoch": 0.93, "learning_rate": 6.724442304606002e-05, "loss": 0.0611, "theoretical_loss": 3.342518429769851, "tokens_seen": 3080192000 }, { "epoch": 0.93, "learning_rate": 6.716417910447761e-05, "loss": 0.0642, "theoretical_loss": 3.342496861295193, "tokens_seen": 3080454144 }, { "epoch": 0.93, "learning_rate": 6.70839351628952e-05, "loss": 0.0656, "theoretical_loss": 3.342475295169794, "tokens_seen": 3080716288 }, { "epoch": 0.93, "learning_rate": 6.70036912213128e-05, "loss": 0.0607, "theoretical_loss": 3.342453731393199, "tokens_seen": 3080978432 }, { "epoch": 0.93, "learning_rate": 6.692344727973038e-05, "loss": 0.0644, "theoretical_loss": 3.342432169964951, "tokens_seen": 3081240576 }, { "epoch": 0.93, "learning_rate": 6.684320333814798e-05, "loss": 0.06, "theoretical_loss": 3.3424106108845955, "tokens_seen": 3081502720 }, { "epoch": 0.93, "learning_rate": 6.676295939656555e-05, "loss": 0.0656, "theoretical_loss": 3.342389054151677, "tokens_seen": 3081764864 }, { "epoch": 0.93, "learning_rate": 6.668271545498315e-05, "loss": 0.0608, "theoretical_loss": 3.34236749976574, "tokens_seen": 3082027008 }, { "epoch": 0.93, "learning_rate": 6.660247151340074e-05, "loss": 0.0635, "theoretical_loss": 3.34234594772633, "tokens_seen": 3082289152 }, { "epoch": 0.93, "learning_rate": 6.652222757181833e-05, "loss": 0.061, "theoretical_loss": 3.342324398032991, "tokens_seen": 3082551296 }, { "epoch": 0.93, "learning_rate": 6.644198363023592e-05, "loss": 0.0616, "theoretical_loss": 3.342302850685269, "tokens_seen": 3082813440 }, { "epoch": 0.93, "learning_rate": 6.636173968865352e-05, "loss": 0.0635, "theoretical_loss": 3.3422813056827088, "tokens_seen": 3083075584 }, { "epoch": 0.93, "learning_rate": 6.628149574707109e-05, "loss": 0.0657, "theoretical_loss": 3.342259763024856, "tokens_seen": 3083337728 }, { "epoch": 0.93, "learning_rate": 6.620125180548869e-05, "loss": 0.0625, "theoretical_loss": 3.3422382227112557, "tokens_seen": 3083599872 }, { "epoch": 0.93, "learning_rate": 6.612100786390628e-05, "loss": 0.0611, "theoretical_loss": 3.3422166847414543, "tokens_seen": 3083862016 }, { "epoch": 0.93, "learning_rate": 6.604076392232386e-05, "loss": 0.0633, "theoretical_loss": 3.3421951491149966, "tokens_seen": 3084124160 }, { "epoch": 0.93, "learning_rate": 6.596051998074146e-05, "loss": 0.0644, "theoretical_loss": 3.3421736158314292, "tokens_seen": 3084386304 }, { "epoch": 0.93, "learning_rate": 6.588027603915905e-05, "loss": 0.0631, "theoretical_loss": 3.3421520848902984, "tokens_seen": 3084648448 }, { "epoch": 0.93, "learning_rate": 6.580003209757663e-05, "loss": 0.0624, "theoretical_loss": 3.3421305562911496, "tokens_seen": 3084910592 }, { "epoch": 0.93, "learning_rate": 6.571978815599422e-05, "loss": 0.0622, "theoretical_loss": 3.342109030033529, "tokens_seen": 3085172736 }, { "epoch": 0.94, "learning_rate": 6.563954421441182e-05, "loss": 0.0641, "theoretical_loss": 3.342087506116984, "tokens_seen": 3085434880 }, { "epoch": 0.94, "learning_rate": 6.55593002728294e-05, "loss": 0.0642, "theoretical_loss": 3.3420659845410605, "tokens_seen": 3085697024 }, { "epoch": 0.94, "learning_rate": 6.5479056331247e-05, "loss": 0.0639, "theoretical_loss": 3.342044465305305, "tokens_seen": 3085959168 }, { "epoch": 0.94, "learning_rate": 6.539881238966459e-05, "loss": 0.0646, "theoretical_loss": 3.3420229484092645, "tokens_seen": 3086221312 }, { "epoch": 0.94, "learning_rate": 6.531856844808217e-05, "loss": 0.0659, "theoretical_loss": 3.3420014338524857, "tokens_seen": 3086483456 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.0005402964889071882, "objective/train/docs_used": 1121586, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2228138446807861, "objective/train/original_loss": 1.2228137254714966, "objective/train/theoretical_loss": 3.3419799216345156, "objective/train/tokens_used": 3107205600, "objective/train/value_avg": -0.005706787109375, "objective/train/value_loss": 0.0002355128963245079, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.6748046875, "objective/train/value_reward_corr": 0.653781937380487, "objective/train/value_std": 0.01299285888671875, "objective/train/weight_avg": 1.0006425380706787, "objective/train/weighted_lm_loss": 1.223292350769043, "objective/train/weights_max": 1.7227903604507446, "objective/train/weights_min": 0.3708694577217102, "theoretical_loss": 3.3419799216345156, "tokens_seen": 3086745600 }, { "epoch": 0.94, "learning_rate": 6.523832450649976e-05, "loss": 0.0629, "theoretical_loss": 3.3419799216345156, "tokens_seen": 3086745600 }, { "epoch": 0.94, "learning_rate": 6.515808056491734e-05, "loss": 0.0639, "theoretical_loss": 3.341958411754902, "tokens_seen": 3087007744 }, { "epoch": 0.94, "learning_rate": 6.507783662333494e-05, "loss": 0.0628, "theoretical_loss": 3.3419369042131915, "tokens_seen": 3087269888 }, { "epoch": 0.94, "learning_rate": 6.499759268175253e-05, "loss": 0.066, "theoretical_loss": 3.341915399008932, "tokens_seen": 3087532032 }, { "epoch": 0.94, "learning_rate": 6.491734874017013e-05, "loss": 0.0644, "theoretical_loss": 3.3418938961416704, "tokens_seen": 3087794176 }, { "epoch": 0.94, "learning_rate": 6.48371047985877e-05, "loss": 0.0645, "theoretical_loss": 3.341872395610955, "tokens_seen": 3088056320 }, { "epoch": 0.94, "learning_rate": 6.47568608570053e-05, "loss": 0.0632, "theoretical_loss": 3.3418508974163332, "tokens_seen": 3088318464 }, { "epoch": 0.94, "learning_rate": 6.467661691542288e-05, "loss": 0.064, "theoretical_loss": 3.3418294015573533, "tokens_seen": 3088580608 }, { "epoch": 0.94, "learning_rate": 6.459637297384048e-05, "loss": 0.0614, "theoretical_loss": 3.3418079080335628, "tokens_seen": 3088842752 }, { "epoch": 0.94, "learning_rate": 6.451612903225807e-05, "loss": 0.0639, "theoretical_loss": 3.34178641684451, "tokens_seen": 3089104896 }, { "epoch": 0.94, "learning_rate": 6.443588509067567e-05, "loss": 0.0651, "theoretical_loss": 3.3417649279897437, "tokens_seen": 3089367040 }, { "epoch": 0.94, "learning_rate": 6.435564114909324e-05, "loss": 0.0608, "theoretical_loss": 3.341743441468812, "tokens_seen": 3089629184 }, { "epoch": 0.94, "learning_rate": 6.427539720751084e-05, "loss": 0.0637, "theoretical_loss": 3.3417219572812633, "tokens_seen": 3089891328 }, { "epoch": 0.94, "learning_rate": 6.419515326592842e-05, "loss": 0.0642, "theoretical_loss": 3.3417004754266464, "tokens_seen": 3090153472 }, { "epoch": 0.94, "learning_rate": 6.411490932434601e-05, "loss": 0.0632, "theoretical_loss": 3.34167899590451, "tokens_seen": 3090415616 }, { "epoch": 0.94, "learning_rate": 6.403466538276361e-05, "loss": 0.063, "theoretical_loss": 3.3416575187144035, "tokens_seen": 3090677760 }, { "epoch": 0.94, "learning_rate": 6.395442144118119e-05, "loss": 0.0649, "theoretical_loss": 3.341636043855875, "tokens_seen": 3090939904 }, { "epoch": 0.94, "learning_rate": 6.387417749959878e-05, "loss": 0.0637, "theoretical_loss": 3.3416145713284746, "tokens_seen": 3091202048 }, { "epoch": 0.94, "learning_rate": 6.379393355801636e-05, "loss": 0.0669, "theoretical_loss": 3.341593101131751, "tokens_seen": 3091464192 }, { "epoch": 0.94, "learning_rate": 6.371368961643396e-05, "loss": 0.0662, "theoretical_loss": 3.341571633265254, "tokens_seen": 3091726336 }, { "epoch": 0.94, "learning_rate": 6.363344567485155e-05, "loss": 0.0674, "theoretical_loss": 3.341550167728533, "tokens_seen": 3091988480 }, { "epoch": 0.94, "learning_rate": 6.355320173326915e-05, "loss": 0.0629, "theoretical_loss": 3.3415287045211377, "tokens_seen": 3092250624 }, { "epoch": 0.94, "learning_rate": 6.347295779168673e-05, "loss": 0.0643, "theoretical_loss": 3.3415072436426176, "tokens_seen": 3092512768 }, { "epoch": 0.94, "learning_rate": 6.339271385010432e-05, "loss": 0.0656, "theoretical_loss": 3.341485785092523, "tokens_seen": 3092774912 }, { "epoch": 0.94, "learning_rate": 6.33124699085219e-05, "loss": 0.0635, "theoretical_loss": 3.341464328870404, "tokens_seen": 3093037056 }, { "epoch": 0.94, "objective/train/advantage_avg": -8.85079352883622e-05, "objective/train/docs_used": 1123676, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.302232027053833, "objective/train/original_loss": 1.302232027053833, "objective/train/theoretical_loss": 3.3414428749758107, "objective/train/tokens_used": 3113759200, "objective/train/value_avg": -0.005947113037109375, "objective/train/value_loss": 0.00013723468873649836, "objective/train/value_max": -1.6987323760986328e-05, "objective/train/value_min": -0.43994140625, "objective/train/value_reward_corr": 0.8200767261251052, "objective/train/value_std": 0.01557159423828125, "objective/train/weight_avg": 0.999978244304657, "objective/train/weighted_lm_loss": 1.302291989326477, "objective/train/weights_max": 1.2921568155288696, "objective/train/weights_min": 0.6140877604484558, "theoretical_loss": 3.3414428749758107, "tokens_seen": 3093299200 }, { "epoch": 0.94, "learning_rate": 6.323222596693949e-05, "loss": 0.064, "theoretical_loss": 3.3414428749758107, "tokens_seen": 3093299200 }, { "epoch": 0.94, "learning_rate": 6.315198202535709e-05, "loss": 0.0666, "theoretical_loss": 3.341421423408293, "tokens_seen": 3093561344 }, { "epoch": 0.94, "learning_rate": 6.307173808377467e-05, "loss": 0.0663, "theoretical_loss": 3.341399974167402, "tokens_seen": 3093823488 }, { "epoch": 0.94, "learning_rate": 6.299149414219227e-05, "loss": 0.0646, "theoretical_loss": 3.3413785272526875, "tokens_seen": 3094085632 }, { "epoch": 0.94, "learning_rate": 6.291125020060985e-05, "loss": 0.0632, "theoretical_loss": 3.341357082663701, "tokens_seen": 3094347776 }, { "epoch": 0.94, "learning_rate": 6.283100625902744e-05, "loss": 0.064, "theoretical_loss": 3.3413356403999925, "tokens_seen": 3094609920 }, { "epoch": 0.94, "learning_rate": 6.275076231744503e-05, "loss": 0.0689, "theoretical_loss": 3.341314200461113, "tokens_seen": 3094872064 }, { "epoch": 0.94, "learning_rate": 6.267051837586263e-05, "loss": 0.0647, "theoretical_loss": 3.3412927628466145, "tokens_seen": 3095134208 }, { "epoch": 0.94, "learning_rate": 6.259027443428021e-05, "loss": 0.0628, "theoretical_loss": 3.341271327556047, "tokens_seen": 3095396352 }, { "epoch": 0.94, "learning_rate": 6.251003049269781e-05, "loss": 0.0632, "theoretical_loss": 3.341249894588963, "tokens_seen": 3095658496 }, { "epoch": 0.94, "learning_rate": 6.242978655111539e-05, "loss": 0.0634, "theoretical_loss": 3.3412284639449124, "tokens_seen": 3095920640 }, { "epoch": 0.94, "learning_rate": 6.234954260953298e-05, "loss": 0.0627, "theoretical_loss": 3.341207035623448, "tokens_seen": 3096182784 }, { "epoch": 0.94, "learning_rate": 6.226929866795057e-05, "loss": 0.0633, "theoretical_loss": 3.341185609624121, "tokens_seen": 3096444928 }, { "epoch": 0.94, "learning_rate": 6.218905472636816e-05, "loss": 0.0633, "theoretical_loss": 3.341164185946483, "tokens_seen": 3096707072 }, { "epoch": 0.94, "learning_rate": 6.210881078478575e-05, "loss": 0.0639, "theoretical_loss": 3.3411427645900864, "tokens_seen": 3096969216 }, { "epoch": 0.94, "learning_rate": 6.202856684320334e-05, "loss": 0.066, "theoretical_loss": 3.341121345554483, "tokens_seen": 3097231360 }, { "epoch": 0.94, "learning_rate": 6.194832290162093e-05, "loss": 0.0649, "theoretical_loss": 3.341099928839225, "tokens_seen": 3097493504 }, { "epoch": 0.94, "learning_rate": 6.186807896003851e-05, "loss": 0.0635, "theoretical_loss": 3.3410785144438644, "tokens_seen": 3097755648 }, { "epoch": 0.94, "learning_rate": 6.178783501845611e-05, "loss": 0.0635, "theoretical_loss": 3.3410571023679543, "tokens_seen": 3098017792 }, { "epoch": 0.94, "learning_rate": 6.17075910768737e-05, "loss": 0.0642, "theoretical_loss": 3.3410356926110465, "tokens_seen": 3098279936 }, { "epoch": 0.94, "learning_rate": 6.162734713529128e-05, "loss": 0.0635, "theoretical_loss": 3.3410142851726943, "tokens_seen": 3098542080 }, { "epoch": 0.94, "learning_rate": 6.154710319370888e-05, "loss": 0.0644, "theoretical_loss": 3.3409928800524495, "tokens_seen": 3098804224 }, { "epoch": 0.94, "learning_rate": 6.146685925212647e-05, "loss": 0.065, "theoretical_loss": 3.3409714772498664, "tokens_seen": 3099066368 }, { "epoch": 0.94, "learning_rate": 6.138661531054405e-05, "loss": 0.0615, "theoretical_loss": 3.3409500767644973, "tokens_seen": 3099328512 }, { "epoch": 0.94, "learning_rate": 6.130637136896165e-05, "loss": 0.0627, "theoretical_loss": 3.340928678595895, "tokens_seen": 3099590656 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.0005747837130911648, "objective/train/docs_used": 1126162, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2767504453659058, "objective/train/original_loss": 1.2767504453659058, "objective/train/theoretical_loss": 3.3409072827436135, "objective/train/tokens_used": 3120312800, "objective/train/value_avg": -0.00778961181640625, "objective/train/value_loss": 0.000372168084140867, "objective/train/value_max": -2.7298927307128906e-05, "objective/train/value_min": -0.73974609375, "objective/train/value_reward_corr": 0.6550666805696803, "objective/train/value_std": 0.01641845703125, "objective/train/weight_avg": 1.0007363557815552, "objective/train/weighted_lm_loss": 1.2775542736053467, "objective/train/weights_max": 2.0023789405822754, "objective/train/weights_min": 0.3681640923023224, "theoretical_loss": 3.3409072827436135, "tokens_seen": 3099852800 }, { "epoch": 0.94, "learning_rate": 6.122612742737924e-05, "loss": 0.0638, "theoretical_loss": 3.3409072827436135, "tokens_seen": 3099852800 }, { "epoch": 0.94, "learning_rate": 6.114588348579682e-05, "loss": 0.0659, "theoretical_loss": 3.3408858892072058, "tokens_seen": 3100114944 }, { "epoch": 0.94, "learning_rate": 6.106563954421442e-05, "loss": 0.065, "theoretical_loss": 3.3408644979862254, "tokens_seen": 3100377088 }, { "epoch": 0.94, "learning_rate": 6.0985395602632006e-05, "loss": 0.0635, "theoretical_loss": 3.340843109080226, "tokens_seen": 3100639232 }, { "epoch": 0.94, "learning_rate": 6.090515166104959e-05, "loss": 0.0638, "theoretical_loss": 3.340821722488762, "tokens_seen": 3100901376 }, { "epoch": 0.94, "learning_rate": 6.082490771946718e-05, "loss": 0.0636, "theoretical_loss": 3.3408003382113862, "tokens_seen": 3101163520 }, { "epoch": 0.94, "learning_rate": 6.074466377788477e-05, "loss": 0.0633, "theoretical_loss": 3.3407789562476538, "tokens_seen": 3101425664 }, { "epoch": 0.94, "learning_rate": 6.066441983630236e-05, "loss": 0.0652, "theoretical_loss": 3.3407575765971176, "tokens_seen": 3101687808 }, { "epoch": 0.94, "learning_rate": 6.058417589471995e-05, "loss": 0.0662, "theoretical_loss": 3.340736199259333, "tokens_seen": 3101949952 }, { "epoch": 0.94, "learning_rate": 6.050393195313754e-05, "loss": 0.0626, "theoretical_loss": 3.3407148242338542, "tokens_seen": 3102212096 }, { "epoch": 0.94, "learning_rate": 6.042368801155513e-05, "loss": 0.0655, "theoretical_loss": 3.3406934515202353, "tokens_seen": 3102474240 }, { "epoch": 0.94, "learning_rate": 6.034344406997272e-05, "loss": 0.067, "theoretical_loss": 3.3406720811180315, "tokens_seen": 3102736384 }, { "epoch": 0.94, "learning_rate": 6.026320012839031e-05, "loss": 0.0625, "theoretical_loss": 3.340650713026797, "tokens_seen": 3102998528 }, { "epoch": 0.94, "learning_rate": 6.01829561868079e-05, "loss": 0.0642, "theoretical_loss": 3.340629347246087, "tokens_seen": 3103260672 }, { "epoch": 0.94, "learning_rate": 6.010271224522549e-05, "loss": 0.0637, "theoretical_loss": 3.3406079837754565, "tokens_seen": 3103522816 }, { "epoch": 0.94, "learning_rate": 6.002246830364308e-05, "loss": 0.0645, "theoretical_loss": 3.3405866226144605, "tokens_seen": 3103784960 }, { "epoch": 0.94, "learning_rate": 5.9942224362060665e-05, "loss": 0.0654, "theoretical_loss": 3.3405652637626546, "tokens_seen": 3104047104 }, { "epoch": 0.94, "learning_rate": 5.986198042047825e-05, "loss": 0.0643, "theoretical_loss": 3.340543907219594, "tokens_seen": 3104309248 }, { "epoch": 0.94, "learning_rate": 5.978173647889584e-05, "loss": 0.0651, "theoretical_loss": 3.340522552984834, "tokens_seen": 3104571392 }, { "epoch": 0.94, "learning_rate": 5.9701492537313435e-05, "loss": 0.0632, "theoretical_loss": 3.3405012010579305, "tokens_seen": 3104833536 }, { "epoch": 0.94, "learning_rate": 5.962124859573102e-05, "loss": 0.0629, "theoretical_loss": 3.340479851438439, "tokens_seen": 3105095680 }, { "epoch": 0.94, "learning_rate": 5.954100465414861e-05, "loss": 0.0645, "theoretical_loss": 3.340458504125916, "tokens_seen": 3105357824 }, { "epoch": 0.94, "learning_rate": 5.9460760712566205e-05, "loss": 0.0639, "theoretical_loss": 3.3404371591199165, "tokens_seen": 3105619968 }, { "epoch": 0.94, "learning_rate": 5.938051677098379e-05, "loss": 0.0643, "theoretical_loss": 3.3404158164199975, "tokens_seen": 3105882112 }, { "epoch": 0.94, "learning_rate": 5.930027282940138e-05, "loss": 0.0685, "theoretical_loss": 3.3403944760257147, "tokens_seen": 3106144256 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.00033568739308975637, "objective/train/docs_used": 1128625, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2322754859924316, "objective/train/original_loss": 1.2322752475738525, "objective/train/theoretical_loss": 3.340373137936625, "objective/train/tokens_used": 3126866400, "objective/train/value_avg": -0.0088043212890625, "objective/train/value_loss": 0.0002217359869973734, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.6103515625, "objective/train/value_reward_corr": 0.7731854039618011, "objective/train/value_std": 0.0171966552734375, "objective/train/weight_avg": 1.0004370212554932, "objective/train/weighted_lm_loss": 1.23274827003479, "objective/train/weights_max": 1.7944713830947876, "objective/train/weights_min": 0.3681126534938812, "theoretical_loss": 3.340373137936625, "tokens_seen": 3106406400 }, { "epoch": 0.94, "learning_rate": 5.9220028887818975e-05, "loss": 0.0646, "theoretical_loss": 3.340373137936625, "tokens_seen": 3106406400 }, { "epoch": 0.94, "learning_rate": 5.913978494623656e-05, "loss": 0.0635, "theoretical_loss": 3.340351802152284, "tokens_seen": 3106668544 }, { "epoch": 0.94, "learning_rate": 5.9059541004654146e-05, "loss": 0.0635, "theoretical_loss": 3.3403304686722497, "tokens_seen": 3106930688 }, { "epoch": 0.94, "learning_rate": 5.897929706307174e-05, "loss": 0.0623, "theoretical_loss": 3.3403091374960776, "tokens_seen": 3107192832 }, { "epoch": 0.94, "learning_rate": 5.8899053121489324e-05, "loss": 0.066, "theoretical_loss": 3.340287808623325, "tokens_seen": 3107454976 }, { "epoch": 0.94, "learning_rate": 5.8818809179906916e-05, "loss": 0.0652, "theoretical_loss": 3.340266482053549, "tokens_seen": 3107717120 }, { "epoch": 0.94, "learning_rate": 5.873856523832451e-05, "loss": 0.0637, "theoretical_loss": 3.3402451577863066, "tokens_seen": 3107979264 }, { "epoch": 0.94, "learning_rate": 5.8658321296742094e-05, "loss": 0.0643, "theoretical_loss": 3.340223835821155, "tokens_seen": 3108241408 }, { "epoch": 0.94, "learning_rate": 5.8578077355159686e-05, "loss": 0.0628, "theoretical_loss": 3.3402025161576514, "tokens_seen": 3108503552 }, { "epoch": 0.94, "learning_rate": 5.849783341357728e-05, "loss": 0.0619, "theoretical_loss": 3.340181198795354, "tokens_seen": 3108765696 }, { "epoch": 0.94, "learning_rate": 5.8417589471994864e-05, "loss": 0.0638, "theoretical_loss": 3.340159883733819, "tokens_seen": 3109027840 }, { "epoch": 0.94, "learning_rate": 5.8337345530412456e-05, "loss": 0.0654, "theoretical_loss": 3.3401385709726052, "tokens_seen": 3109289984 }, { "epoch": 0.94, "learning_rate": 5.825710158883005e-05, "loss": 0.0634, "theoretical_loss": 3.3401172605112706, "tokens_seen": 3109552128 }, { "epoch": 0.94, "learning_rate": 5.8176857647247634e-05, "loss": 0.0682, "theoretical_loss": 3.3400959523493725, "tokens_seen": 3109814272 }, { "epoch": 0.94, "learning_rate": 5.809661370566522e-05, "loss": 0.0649, "theoretical_loss": 3.3400746464864692, "tokens_seen": 3110076416 }, { "epoch": 0.94, "learning_rate": 5.801636976408281e-05, "loss": 0.0657, "theoretical_loss": 3.340053342922119, "tokens_seen": 3110338560 }, { "epoch": 0.94, "learning_rate": 5.79361258225004e-05, "loss": 0.0645, "theoretical_loss": 3.3400320416558804, "tokens_seen": 3110600704 }, { "epoch": 0.94, "learning_rate": 5.785588188091799e-05, "loss": 0.0649, "theoretical_loss": 3.340010742687311, "tokens_seen": 3110862848 }, { "epoch": 0.94, "learning_rate": 5.777563793933558e-05, "loss": 0.0655, "theoretical_loss": 3.339989446015971, "tokens_seen": 3111124992 }, { "epoch": 0.94, "learning_rate": 5.769539399775317e-05, "loss": 0.0653, "theoretical_loss": 3.3399681516414175, "tokens_seen": 3111387136 }, { "epoch": 0.94, "learning_rate": 5.761515005617076e-05, "loss": 0.066, "theoretical_loss": 3.33994685956321, "tokens_seen": 3111649280 }, { "epoch": 0.94, "learning_rate": 5.753490611458835e-05, "loss": 0.0683, "theoretical_loss": 3.3399255697809074, "tokens_seen": 3111911424 }, { "epoch": 0.94, "learning_rate": 5.745466217300594e-05, "loss": 0.0675, "theoretical_loss": 3.3399042822940688, "tokens_seen": 3112173568 }, { "epoch": 0.94, "learning_rate": 5.737441823142353e-05, "loss": 0.0649, "theoretical_loss": 3.339882997102253, "tokens_seen": 3112435712 }, { "epoch": 0.94, "learning_rate": 5.729417428984112e-05, "loss": 0.0651, "theoretical_loss": 3.3398617142050195, "tokens_seen": 3112697856 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.0003075933491345495, "objective/train/docs_used": 1130876, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2553244829177856, "objective/train/original_loss": 1.2553246021270752, "objective/train/theoretical_loss": 3.3398404336019283, "objective/train/tokens_used": 3133420000, "objective/train/value_avg": -0.0066986083984375, "objective/train/value_loss": 0.00021400822151917964, "objective/train/value_max": -1.9848346710205078e-05, "objective/train/value_min": -0.6826171875, "objective/train/value_reward_corr": 0.7202309173108771, "objective/train/value_std": 0.0164794921875, "objective/train/weight_avg": 1.0004074573516846, "objective/train/weighted_lm_loss": 1.2557744979858398, "objective/train/weights_max": 1.742195725440979, "objective/train/weights_min": 0.3772936761379242, "theoretical_loss": 3.3398404336019283, "tokens_seen": 3112960000 }, { "epoch": 0.94, "learning_rate": 5.7213930348258714e-05, "loss": 0.065, "theoretical_loss": 3.3398404336019283, "tokens_seen": 3112960000 }, { "epoch": 0.94, "learning_rate": 5.713368640667629e-05, "loss": 0.0647, "theoretical_loss": 3.3398191552925383, "tokens_seen": 3113222144 }, { "epoch": 0.94, "learning_rate": 5.7053442465093885e-05, "loss": 0.0631, "theoretical_loss": 3.33979787927641, "tokens_seen": 3113484288 }, { "epoch": 0.94, "learning_rate": 5.697319852351147e-05, "loss": 0.0624, "theoretical_loss": 3.3397766055531015, "tokens_seen": 3113746432 }, { "epoch": 0.94, "learning_rate": 5.689295458192906e-05, "loss": 0.0654, "theoretical_loss": 3.3397553341221746, "tokens_seen": 3114008576 }, { "epoch": 0.94, "learning_rate": 5.6812710640346655e-05, "loss": 0.0617, "theoretical_loss": 3.3397340649831877, "tokens_seen": 3114270720 }, { "epoch": 0.94, "learning_rate": 5.673246669876425e-05, "loss": 0.0619, "theoretical_loss": 3.3397127981357024, "tokens_seen": 3114532864 }, { "epoch": 0.94, "learning_rate": 5.665222275718183e-05, "loss": 0.065, "theoretical_loss": 3.339691533579278, "tokens_seen": 3114795008 }, { "epoch": 0.94, "learning_rate": 5.6571978815599425e-05, "loss": 0.0644, "theoretical_loss": 3.3396702713134756, "tokens_seen": 3115057152 }, { "epoch": 0.94, "learning_rate": 5.649173487401702e-05, "loss": 0.0661, "theoretical_loss": 3.3396490113378547, "tokens_seen": 3115319296 }, { "epoch": 0.94, "learning_rate": 5.64114909324346e-05, "loss": 0.0644, "theoretical_loss": 3.3396277536519774, "tokens_seen": 3115581440 }, { "epoch": 0.94, "learning_rate": 5.6331246990852196e-05, "loss": 0.0644, "theoretical_loss": 3.339606498255403, "tokens_seen": 3115843584 }, { "epoch": 0.94, "learning_rate": 5.625100304926979e-05, "loss": 0.064, "theoretical_loss": 3.3395852451476937, "tokens_seen": 3116105728 }, { "epoch": 0.94, "learning_rate": 5.6170759107687367e-05, "loss": 0.0647, "theoretical_loss": 3.339563994328409, "tokens_seen": 3116367872 }, { "epoch": 0.94, "learning_rate": 5.609051516610496e-05, "loss": 0.0652, "theoretical_loss": 3.339542745797112, "tokens_seen": 3116630016 }, { "epoch": 0.94, "learning_rate": 5.601027122452255e-05, "loss": 0.0674, "theoretical_loss": 3.3395214995533617, "tokens_seen": 3116892160 }, { "epoch": 0.94, "learning_rate": 5.5930027282940137e-05, "loss": 0.0674, "theoretical_loss": 3.339500255596721, "tokens_seen": 3117154304 }, { "epoch": 0.94, "learning_rate": 5.584978334135773e-05, "loss": 0.0703, "theoretical_loss": 3.339479013926751, "tokens_seen": 3117416448 }, { "epoch": 0.94, "learning_rate": 5.576953939977532e-05, "loss": 0.0663, "theoretical_loss": 3.3394577745430136, "tokens_seen": 3117678592 }, { "epoch": 0.94, "learning_rate": 5.568929545819291e-05, "loss": 0.0692, "theoretical_loss": 3.33943653744507, "tokens_seen": 3117940736 }, { "epoch": 0.94, "learning_rate": 5.56090515166105e-05, "loss": 0.067, "theoretical_loss": 3.339415302632482, "tokens_seen": 3118202880 }, { "epoch": 0.95, "learning_rate": 5.552880757502809e-05, "loss": 0.0691, "theoretical_loss": 3.339394070104812, "tokens_seen": 3118465024 }, { "epoch": 0.95, "learning_rate": 5.544856363344568e-05, "loss": 0.0655, "theoretical_loss": 3.339372839861622, "tokens_seen": 3118727168 }, { "epoch": 0.95, "learning_rate": 5.536831969186327e-05, "loss": 0.066, "theoretical_loss": 3.339351611902474, "tokens_seen": 3118989312 }, { "epoch": 0.95, "learning_rate": 5.5288075750280855e-05, "loss": 0.0638, "theoretical_loss": 3.339330386226931, "tokens_seen": 3119251456 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.0004081982187926769, "objective/train/docs_used": 1133291, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.438138484954834, "objective/train/original_loss": 1.438138484954834, "objective/train/theoretical_loss": 3.3393091628345544, "objective/train/tokens_used": 3139973600, "objective/train/value_avg": -0.00905609130859375, "objective/train/value_loss": 0.00029880873626098037, "objective/train/value_max": -2.014636993408203e-05, "objective/train/value_min": -0.681640625, "objective/train/value_reward_corr": 0.6956209491124092, "objective/train/value_std": 0.0166778564453125, "objective/train/weight_avg": 1.0005412101745605, "objective/train/weighted_lm_loss": 1.4387654066085815, "objective/train/weights_max": 1.4040422439575195, "objective/train/weights_min": 0.37822747230529785, "theoretical_loss": 3.3393091628345544, "tokens_seen": 3119513600 }, { "epoch": 0.95, "learning_rate": 5.520783180869844e-05, "loss": 0.0672, "theoretical_loss": 3.3393091628345544, "tokens_seen": 3119513600 }, { "epoch": 0.95, "learning_rate": 5.512758786711603e-05, "loss": 0.063, "theoretical_loss": 3.3392879417249075, "tokens_seen": 3119775744 }, { "epoch": 0.95, "learning_rate": 5.5047343925533625e-05, "loss": 0.0697, "theoretical_loss": 3.3392667228975528, "tokens_seen": 3120037888 }, { "epoch": 0.95, "learning_rate": 5.496709998395121e-05, "loss": 0.0652, "theoretical_loss": 3.339245506352053, "tokens_seen": 3120300032 }, { "epoch": 0.95, "learning_rate": 5.48868560423688e-05, "loss": 0.0685, "theoretical_loss": 3.339224292087972, "tokens_seen": 3120562176 }, { "epoch": 0.95, "learning_rate": 5.4806612100786395e-05, "loss": 0.0675, "theoretical_loss": 3.339203080104871, "tokens_seen": 3120824320 }, { "epoch": 0.95, "learning_rate": 5.472636815920398e-05, "loss": 0.0648, "theoretical_loss": 3.3391818704023146, "tokens_seen": 3121086464 }, { "epoch": 0.95, "learning_rate": 5.464612421762157e-05, "loss": 0.0657, "theoretical_loss": 3.339160662979866, "tokens_seen": 3121348608 }, { "epoch": 0.95, "learning_rate": 5.4565880276039165e-05, "loss": 0.0631, "theoretical_loss": 3.3391394578370877, "tokens_seen": 3121610752 }, { "epoch": 0.95, "learning_rate": 5.448563633445675e-05, "loss": 0.0661, "theoretical_loss": 3.3391182549735445, "tokens_seen": 3121872896 }, { "epoch": 0.95, "learning_rate": 5.440539239287434e-05, "loss": 0.0651, "theoretical_loss": 3.339097054388799, "tokens_seen": 3122135040 }, { "epoch": 0.95, "learning_rate": 5.432514845129193e-05, "loss": 0.0674, "theoretical_loss": 3.3390758560824154, "tokens_seen": 3122397184 }, { "epoch": 0.95, "learning_rate": 5.4244904509709514e-05, "loss": 0.0644, "theoretical_loss": 3.3390546600539577, "tokens_seen": 3122659328 }, { "epoch": 0.95, "learning_rate": 5.4164660568127106e-05, "loss": 0.0672, "theoretical_loss": 3.33903346630299, "tokens_seen": 3122921472 }, { "epoch": 0.95, "learning_rate": 5.40844166265447e-05, "loss": 0.0648, "theoretical_loss": 3.3390122748290763, "tokens_seen": 3123183616 }, { "epoch": 0.95, "learning_rate": 5.4004172684962284e-05, "loss": 0.064, "theoretical_loss": 3.338991085631781, "tokens_seen": 3123445760 }, { "epoch": 0.95, "learning_rate": 5.3923928743379876e-05, "loss": 0.0649, "theoretical_loss": 3.3389698987106673, "tokens_seen": 3123707904 }, { "epoch": 0.95, "learning_rate": 5.384368480179747e-05, "loss": 0.0679, "theoretical_loss": 3.338948714065302, "tokens_seen": 3123970048 }, { "epoch": 0.95, "learning_rate": 5.3763440860215054e-05, "loss": 0.0659, "theoretical_loss": 3.3389275316952474, "tokens_seen": 3124232192 }, { "epoch": 0.95, "learning_rate": 5.3683196918632646e-05, "loss": 0.066, "theoretical_loss": 3.3389063516000697, "tokens_seen": 3124494336 }, { "epoch": 0.95, "learning_rate": 5.360295297705024e-05, "loss": 0.0677, "theoretical_loss": 3.3388851737793335, "tokens_seen": 3124756480 }, { "epoch": 0.95, "learning_rate": 5.3522709035467824e-05, "loss": 0.0637, "theoretical_loss": 3.338863998232603, "tokens_seen": 3125018624 }, { "epoch": 0.95, "learning_rate": 5.3442465093885416e-05, "loss": 0.0669, "theoretical_loss": 3.338842824959445, "tokens_seen": 3125280768 }, { "epoch": 0.95, "learning_rate": 5.3362221152303e-05, "loss": 0.0652, "theoretical_loss": 3.338821653959423, "tokens_seen": 3125542912 }, { "epoch": 0.95, "learning_rate": 5.328197721072059e-05, "loss": 0.0659, "theoretical_loss": 3.3388004852321025, "tokens_seen": 3125805056 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.0002322028885828331, "objective/train/docs_used": 1135598, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3199049234390259, "objective/train/original_loss": 1.3199048042297363, "objective/train/theoretical_loss": 3.33877931877705, "objective/train/tokens_used": 3146527200, "objective/train/value_avg": -0.010223388671875, "objective/train/value_loss": 0.0003747669979929924, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.9521484375, "objective/train/value_reward_corr": 0.7227508381490007, "objective/train/value_std": 0.0213623046875, "objective/train/weight_avg": 1.0004043579101562, "objective/train/weighted_lm_loss": 1.3192325830459595, "objective/train/weights_max": 2.4473495483398438, "objective/train/weights_min": 0.36831334233283997, "theoretical_loss": 3.33877931877705, "tokens_seen": 3126067200 }, { "epoch": 0.95, "learning_rate": 5.320173326913818e-05, "loss": 0.0645, "theoretical_loss": 3.33877931877705, "tokens_seen": 3126067200 }, { "epoch": 0.95, "learning_rate": 5.312148932755577e-05, "loss": 0.0671, "theoretical_loss": 3.3387581545938305, "tokens_seen": 3126329344 }, { "epoch": 0.95, "learning_rate": 5.304124538597336e-05, "loss": 0.0645, "theoretical_loss": 3.3387369926820094, "tokens_seen": 3126591488 }, { "epoch": 0.95, "learning_rate": 5.296100144439095e-05, "loss": 0.0671, "theoretical_loss": 3.338715833041153, "tokens_seen": 3126853632 }, { "epoch": 0.95, "learning_rate": 5.288075750280854e-05, "loss": 0.0637, "theoretical_loss": 3.338694675670827, "tokens_seen": 3127115776 }, { "epoch": 0.95, "learning_rate": 5.280051356122613e-05, "loss": 0.0676, "theoretical_loss": 3.3386735205705977, "tokens_seen": 3127377920 }, { "epoch": 0.95, "learning_rate": 5.272026961964372e-05, "loss": 0.0655, "theoretical_loss": 3.338652367740031, "tokens_seen": 3127640064 }, { "epoch": 0.95, "learning_rate": 5.264002567806131e-05, "loss": 0.0673, "theoretical_loss": 3.338631217178693, "tokens_seen": 3127902208 }, { "epoch": 0.95, "learning_rate": 5.25597817364789e-05, "loss": 0.0621, "theoretical_loss": 3.3386100688861506, "tokens_seen": 3128164352 }, { "epoch": 0.95, "learning_rate": 5.247953779489649e-05, "loss": 0.0621, "theoretical_loss": 3.33858892286197, "tokens_seen": 3128426496 }, { "epoch": 0.95, "learning_rate": 5.2399293853314075e-05, "loss": 0.0649, "theoretical_loss": 3.338567779105718, "tokens_seen": 3128688640 }, { "epoch": 0.95, "learning_rate": 5.231904991173166e-05, "loss": 0.0651, "theoretical_loss": 3.3385466376169615, "tokens_seen": 3128950784 }, { "epoch": 0.95, "learning_rate": 5.223880597014925e-05, "loss": 0.0646, "theoretical_loss": 3.338525498395267, "tokens_seen": 3129212928 }, { "epoch": 0.95, "learning_rate": 5.2158562028566845e-05, "loss": 0.0636, "theoretical_loss": 3.338504361440202, "tokens_seen": 3129475072 }, { "epoch": 0.95, "learning_rate": 5.207831808698443e-05, "loss": 0.0636, "theoretical_loss": 3.3384832267513334, "tokens_seen": 3129737216 }, { "epoch": 0.95, "learning_rate": 5.199807414540202e-05, "loss": 0.0682, "theoretical_loss": 3.338462094328228, "tokens_seen": 3129999360 }, { "epoch": 0.95, "learning_rate": 5.1917830203819615e-05, "loss": 0.0659, "theoretical_loss": 3.338440964170454, "tokens_seen": 3130261504 }, { "epoch": 0.95, "learning_rate": 5.18375862622372e-05, "loss": 0.0672, "theoretical_loss": 3.338419836277578, "tokens_seen": 3130523648 }, { "epoch": 0.95, "learning_rate": 5.175734232065479e-05, "loss": 0.0653, "theoretical_loss": 3.338398710649168, "tokens_seen": 3130785792 }, { "epoch": 0.95, "learning_rate": 5.1677098379072385e-05, "loss": 0.0631, "theoretical_loss": 3.3383775872847923, "tokens_seen": 3131047936 }, { "epoch": 0.95, "learning_rate": 5.159685443748997e-05, "loss": 0.0672, "theoretical_loss": 3.3383564661840177, "tokens_seen": 3131310080 }, { "epoch": 0.95, "learning_rate": 5.151661049590756e-05, "loss": 0.062, "theoretical_loss": 3.3383353473464132, "tokens_seen": 3131572224 }, { "epoch": 0.95, "learning_rate": 5.143636655432515e-05, "loss": 0.0648, "theoretical_loss": 3.3383142307715463, "tokens_seen": 3131834368 }, { "epoch": 0.95, "learning_rate": 5.1356122612742734e-05, "loss": 0.0642, "theoretical_loss": 3.338293116458985, "tokens_seen": 3132096512 }, { "epoch": 0.95, "learning_rate": 5.1275878671160326e-05, "loss": 0.0665, "theoretical_loss": 3.3382720044082976, "tokens_seen": 3132358656 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.00031480283359996974, "objective/train/docs_used": 1137994, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3578956127166748, "objective/train/original_loss": 1.3578956127166748, "objective/train/theoretical_loss": 3.3382508946190534, "objective/train/tokens_used": 3153080800, "objective/train/value_avg": -0.007335662841796875, "objective/train/value_loss": 0.0002345583779970184, "objective/train/value_max": -3.7610530853271484e-05, "objective/train/value_min": -0.30712890625, "objective/train/value_reward_corr": 0.6553938720168582, "objective/train/value_std": 0.01401519775390625, "objective/train/weight_avg": 1.0004183053970337, "objective/train/weighted_lm_loss": 1.3580381870269775, "objective/train/weights_max": 1.2803038358688354, "objective/train/weights_min": 0.3710845410823822, "theoretical_loss": 3.3382508946190534, "tokens_seen": 3132620800 }, { "epoch": 0.95, "learning_rate": 5.119563472957792e-05, "loss": 0.065, "theoretical_loss": 3.3382508946190534, "tokens_seen": 3132620800 }, { "epoch": 0.95, "learning_rate": 5.1115390787995504e-05, "loss": 0.0641, "theoretical_loss": 3.33822978709082, "tokens_seen": 3132882944 }, { "epoch": 0.95, "learning_rate": 5.1035146846413096e-05, "loss": 0.0637, "theoretical_loss": 3.3382086818231667, "tokens_seen": 3133145088 }, { "epoch": 0.95, "learning_rate": 5.095490290483069e-05, "loss": 0.063, "theoretical_loss": 3.3381875788156616, "tokens_seen": 3133407232 }, { "epoch": 0.95, "learning_rate": 5.0874658963248274e-05, "loss": 0.0611, "theoretical_loss": 3.3381664780678744, "tokens_seen": 3133669376 }, { "epoch": 0.95, "learning_rate": 5.0794415021665867e-05, "loss": 0.065, "theoretical_loss": 3.338145379579373, "tokens_seen": 3133931520 }, { "epoch": 0.95, "learning_rate": 5.071417108008346e-05, "loss": 0.064, "theoretical_loss": 3.338124283349728, "tokens_seen": 3134193664 }, { "epoch": 0.95, "learning_rate": 5.0633927138501044e-05, "loss": 0.0664, "theoretical_loss": 3.338103189378508, "tokens_seen": 3134455808 }, { "epoch": 0.95, "learning_rate": 5.055368319691863e-05, "loss": 0.0641, "theoretical_loss": 3.338082097665282, "tokens_seen": 3134717952 }, { "epoch": 0.95, "learning_rate": 5.047343925533622e-05, "loss": 0.0658, "theoretical_loss": 3.33806100820962, "tokens_seen": 3134980096 }, { "epoch": 0.95, "learning_rate": 5.039319531375381e-05, "loss": 0.0638, "theoretical_loss": 3.338039921011091, "tokens_seen": 3135242240 }, { "epoch": 0.95, "learning_rate": 5.03129513721714e-05, "loss": 0.0638, "theoretical_loss": 3.3380188360692657, "tokens_seen": 3135504384 }, { "epoch": 0.95, "learning_rate": 5.023270743058899e-05, "loss": 0.0672, "theoretical_loss": 3.3379977533837133, "tokens_seen": 3135766528 }, { "epoch": 0.95, "learning_rate": 5.015246348900658e-05, "loss": 0.0691, "theoretical_loss": 3.3379766729540035, "tokens_seen": 3136028672 }, { "epoch": 0.95, "learning_rate": 5.007221954742417e-05, "loss": 0.0622, "theoretical_loss": 3.3379555947797073, "tokens_seen": 3136290816 }, { "epoch": 0.95, "learning_rate": 4.999197560584176e-05, "loss": 0.0654, "theoretical_loss": 3.337934518860394, "tokens_seen": 3136552960 }, { "epoch": 0.95, "learning_rate": 4.991173166425935e-05, "loss": 0.0653, "theoretical_loss": 3.3379134451956345, "tokens_seen": 3136815104 }, { "epoch": 0.95, "learning_rate": 4.983148772267694e-05, "loss": 0.0624, "theoretical_loss": 3.337892373784999, "tokens_seen": 3137077248 }, { "epoch": 0.95, "learning_rate": 4.975124378109453e-05, "loss": 0.0671, "theoretical_loss": 3.337871304628058, "tokens_seen": 3137339392 }, { "epoch": 0.95, "learning_rate": 4.967099983951212e-05, "loss": 0.0685, "theoretical_loss": 3.3378502377243824, "tokens_seen": 3137601536 }, { "epoch": 0.95, "learning_rate": 4.9590755897929703e-05, "loss": 0.0635, "theoretical_loss": 3.3378291730735428, "tokens_seen": 3137863680 }, { "epoch": 0.95, "learning_rate": 4.9510511956347296e-05, "loss": 0.0651, "theoretical_loss": 3.3378081106751103, "tokens_seen": 3138125824 }, { "epoch": 0.95, "learning_rate": 4.943026801476488e-05, "loss": 0.0637, "theoretical_loss": 3.3377870505286555, "tokens_seen": 3138387968 }, { "epoch": 0.95, "learning_rate": 4.9350024073182473e-05, "loss": 0.0655, "theoretical_loss": 3.3377659926337504, "tokens_seen": 3138650112 }, { "epoch": 0.95, "learning_rate": 4.9269780131600066e-05, "loss": 0.0638, "theoretical_loss": 3.3377449369899654, "tokens_seen": 3138912256 }, { "epoch": 0.95, "objective/train/advantage_avg": 2.2450554752140306e-05, "objective/train/docs_used": 1140387, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2468316555023193, "objective/train/original_loss": 1.2468316555023193, "objective/train/theoretical_loss": 3.337723883596872, "objective/train/tokens_used": 3159634400, "objective/train/value_avg": -0.00865936279296875, "objective/train/value_loss": 0.0004632585623767227, "objective/train/value_max": -1.3530254364013672e-05, "objective/train/value_min": -0.7841796875, "objective/train/value_reward_corr": 0.6389569430252178, "objective/train/value_std": 0.0171966552734375, "objective/train/weight_avg": 1.0002206563949585, "objective/train/weighted_lm_loss": 1.2470567226409912, "objective/train/weights_max": 1.8875012397766113, "objective/train/weights_min": 0.3693002760410309, "theoretical_loss": 3.337723883596872, "tokens_seen": 3139174400 }, { "epoch": 0.95, "learning_rate": 4.918953619001765e-05, "loss": 0.0635, "theoretical_loss": 3.337723883596872, "tokens_seen": 3139174400 }, { "epoch": 0.95, "learning_rate": 4.9109292248435244e-05, "loss": 0.0648, "theoretical_loss": 3.337702832454042, "tokens_seen": 3139436544 }, { "epoch": 0.95, "learning_rate": 4.9029048306852836e-05, "loss": 0.0626, "theoretical_loss": 3.337681783561047, "tokens_seen": 3139698688 }, { "epoch": 0.95, "learning_rate": 4.894880436527042e-05, "loss": 0.0658, "theoretical_loss": 3.3376607369174587, "tokens_seen": 3139960832 }, { "epoch": 0.95, "learning_rate": 4.8868560423688014e-05, "loss": 0.0642, "theoretical_loss": 3.337639692522849, "tokens_seen": 3140222976 }, { "epoch": 0.95, "learning_rate": 4.8788316482105606e-05, "loss": 0.0673, "theoretical_loss": 3.3376186503767897, "tokens_seen": 3140485120 }, { "epoch": 0.95, "learning_rate": 4.870807254052319e-05, "loss": 0.0657, "theoretical_loss": 3.3375976104788525, "tokens_seen": 3140747264 }, { "epoch": 0.95, "learning_rate": 4.862782859894078e-05, "loss": 0.0652, "theoretical_loss": 3.3375765728286106, "tokens_seen": 3141009408 }, { "epoch": 0.95, "learning_rate": 4.854758465735837e-05, "loss": 0.0656, "theoretical_loss": 3.3375555374256356, "tokens_seen": 3141271552 }, { "epoch": 0.95, "learning_rate": 4.8467340715775955e-05, "loss": 0.0632, "theoretical_loss": 3.3375345042695, "tokens_seen": 3141533696 }, { "epoch": 0.95, "learning_rate": 4.838709677419355e-05, "loss": 0.063, "theoretical_loss": 3.3375134733597767, "tokens_seen": 3141795840 }, { "epoch": 0.95, "learning_rate": 4.830685283261114e-05, "loss": 0.0647, "theoretical_loss": 3.337492444696038, "tokens_seen": 3142057984 }, { "epoch": 0.95, "learning_rate": 4.822660889102873e-05, "loss": 0.0666, "theoretical_loss": 3.3374714182778566, "tokens_seen": 3142320128 }, { "epoch": 0.95, "learning_rate": 4.814636494944632e-05, "loss": 0.0667, "theoretical_loss": 3.337450394104806, "tokens_seen": 3142582272 }, { "epoch": 0.95, "learning_rate": 4.806612100786391e-05, "loss": 0.0675, "theoretical_loss": 3.3374293721764587, "tokens_seen": 3142844416 }, { "epoch": 0.95, "learning_rate": 4.79858770662815e-05, "loss": 0.0624, "theoretical_loss": 3.337408352492388, "tokens_seen": 3143106560 }, { "epoch": 0.95, "learning_rate": 4.790563312469909e-05, "loss": 0.0655, "theoretical_loss": 3.3373873350521674, "tokens_seen": 3143368704 }, { "epoch": 0.95, "learning_rate": 4.782538918311668e-05, "loss": 0.0639, "theoretical_loss": 3.3373663198553696, "tokens_seen": 3143630848 }, { "epoch": 0.95, "learning_rate": 4.774514524153427e-05, "loss": 0.0664, "theoretical_loss": 3.3373453069015686, "tokens_seen": 3143892992 }, { "epoch": 0.95, "learning_rate": 4.766490129995185e-05, "loss": 0.0656, "theoretical_loss": 3.337324296190338, "tokens_seen": 3144155136 }, { "epoch": 0.95, "learning_rate": 4.758465735836944e-05, "loss": 0.0648, "theoretical_loss": 3.3373032877212516, "tokens_seen": 3144417280 }, { "epoch": 0.95, "learning_rate": 4.7504413416787035e-05, "loss": 0.0645, "theoretical_loss": 3.3372822814938825, "tokens_seen": 3144679424 }, { "epoch": 0.95, "learning_rate": 4.742416947520462e-05, "loss": 0.0642, "theoretical_loss": 3.337261277507806, "tokens_seen": 3144941568 }, { "epoch": 0.95, "learning_rate": 4.734392553362221e-05, "loss": 0.0646, "theoretical_loss": 3.3372402757625945, "tokens_seen": 3145203712 }, { "epoch": 0.95, "learning_rate": 4.7263681592039805e-05, "loss": 0.0657, "theoretical_loss": 3.337219276257824, "tokens_seen": 3145465856 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.0006178407347761095, "objective/train/docs_used": 1142692, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2589737176895142, "objective/train/original_loss": 1.258973479270935, "objective/train/theoretical_loss": 3.3371982789930668, "objective/train/tokens_used": 3166188000, "objective/train/value_avg": -0.006740570068359375, "objective/train/value_loss": 0.00018427084432914853, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.6171875, "objective/train/value_reward_corr": 0.7213768801644866, "objective/train/value_std": 0.014190673828125, "objective/train/weight_avg": 1.000701904296875, "objective/train/weighted_lm_loss": 1.2594910860061646, "objective/train/weights_max": 1.3403171300888062, "objective/train/weights_min": 0.36926716566085815, "theoretical_loss": 3.3371982789930668, "tokens_seen": 3145728000 }, { "epoch": 0.95, "learning_rate": 4.718343765045739e-05, "loss": 0.0619, "theoretical_loss": 3.3371982789930668, "tokens_seen": 3145728000 }, { "epoch": 0.95, "learning_rate": 4.710319370887498e-05, "loss": 0.0642, "theoretical_loss": 3.3371772839678995, "tokens_seen": 3145990144 }, { "epoch": 0.95, "learning_rate": 4.7022949767292575e-05, "loss": 0.0647, "theoretical_loss": 3.337156291181895, "tokens_seen": 3146252288 }, { "epoch": 0.95, "learning_rate": 4.694270582571016e-05, "loss": 0.0653, "theoretical_loss": 3.3371353006346283, "tokens_seen": 3146514432 }, { "epoch": 0.95, "learning_rate": 4.686246188412775e-05, "loss": 0.0622, "theoretical_loss": 3.3371143123256743, "tokens_seen": 3146776576 }, { "epoch": 0.95, "learning_rate": 4.678221794254534e-05, "loss": 0.0658, "theoretical_loss": 3.3370933262546085, "tokens_seen": 3147038720 }, { "epoch": 0.95, "learning_rate": 4.6701974000962924e-05, "loss": 0.0652, "theoretical_loss": 3.337072342421005, "tokens_seen": 3147300864 }, { "epoch": 0.95, "learning_rate": 4.6621730059380516e-05, "loss": 0.0643, "theoretical_loss": 3.3370513608244394, "tokens_seen": 3147563008 }, { "epoch": 0.95, "learning_rate": 4.654148611779811e-05, "loss": 0.0643, "theoretical_loss": 3.3370303814644866, "tokens_seen": 3147825152 }, { "epoch": 0.95, "learning_rate": 4.6461242176215694e-05, "loss": 0.0642, "theoretical_loss": 3.337009404340722, "tokens_seen": 3148087296 }, { "epoch": 0.95, "learning_rate": 4.6380998234633286e-05, "loss": 0.0639, "theoretical_loss": 3.3369884294527217, "tokens_seen": 3148349440 }, { "epoch": 0.95, "learning_rate": 4.630075429305088e-05, "loss": 0.0672, "theoretical_loss": 3.3369674568000605, "tokens_seen": 3148611584 }, { "epoch": 0.95, "learning_rate": 4.6220510351468464e-05, "loss": 0.0647, "theoretical_loss": 3.336946486382314, "tokens_seen": 3148873728 }, { "epoch": 0.95, "learning_rate": 4.6140266409886056e-05, "loss": 0.0608, "theoretical_loss": 3.336925518199059, "tokens_seen": 3149135872 }, { "epoch": 0.95, "learning_rate": 4.606002246830365e-05, "loss": 0.0654, "theoretical_loss": 3.336904552249871, "tokens_seen": 3149398016 }, { "epoch": 0.95, "learning_rate": 4.5979778526721234e-05, "loss": 0.0636, "theoretical_loss": 3.3368835885343255, "tokens_seen": 3149660160 }, { "epoch": 0.95, "learning_rate": 4.5899534585138826e-05, "loss": 0.0635, "theoretical_loss": 3.3368626270519988, "tokens_seen": 3149922304 }, { "epoch": 0.95, "learning_rate": 4.581929064355641e-05, "loss": 0.065, "theoretical_loss": 3.3368416678024677, "tokens_seen": 3150184448 }, { "epoch": 0.95, "learning_rate": 4.5739046701974e-05, "loss": 0.0655, "theoretical_loss": 3.3368207107853083, "tokens_seen": 3150446592 }, { "epoch": 0.95, "learning_rate": 4.565880276039159e-05, "loss": 0.0638, "theoretical_loss": 3.3367997560000973, "tokens_seen": 3150708736 }, { "epoch": 0.95, "learning_rate": 4.557855881880918e-05, "loss": 0.0654, "theoretical_loss": 3.3367788034464105, "tokens_seen": 3150970880 }, { "epoch": 0.95, "learning_rate": 4.549831487722677e-05, "loss": 0.0627, "theoretical_loss": 3.336757853123826, "tokens_seen": 3151233024 }, { "epoch": 0.96, "learning_rate": 4.541807093564436e-05, "loss": 0.0659, "theoretical_loss": 3.3367369050319193, "tokens_seen": 3151495168 }, { "epoch": 0.96, "learning_rate": 4.533782699406195e-05, "loss": 0.064, "theoretical_loss": 3.3367159591702684, "tokens_seen": 3151757312 }, { "epoch": 0.96, "learning_rate": 4.525758305247954e-05, "loss": 0.0631, "theoretical_loss": 3.33669501553845, "tokens_seen": 3152019456 }, { "epoch": 0.96, "objective/train/advantage_avg": -0.00011186428309883922, "objective/train/docs_used": 1145159, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2790420055389404, "objective/train/original_loss": 1.2790418863296509, "objective/train/theoretical_loss": 3.336674074136041, "objective/train/tokens_used": 3172741600, "objective/train/value_avg": -0.00704193115234375, "objective/train/value_loss": 0.00023630002397112548, "objective/train/value_max": -2.390146255493164e-05, "objective/train/value_min": -0.98193359375, "objective/train/value_reward_corr": 0.8191802793083187, "objective/train/value_std": 0.02069091796875, "objective/train/weight_avg": 0.9999978542327881, "objective/train/weighted_lm_loss": 1.2786507606506348, "objective/train/weights_max": 1.631901741027832, "objective/train/weights_min": 0.373872309923172, "theoretical_loss": 3.336674074136041, "tokens_seen": 3152281600 }, { "epoch": 0.96, "learning_rate": 4.517733911089713e-05, "loss": 0.065, "theoretical_loss": 3.336674074136041, "tokens_seen": 3152281600 }, { "epoch": 0.96, "learning_rate": 4.509709516931472e-05, "loss": 0.0621, "theoretical_loss": 3.336653134962619, "tokens_seen": 3152543744 }, { "epoch": 0.96, "learning_rate": 4.501685122773231e-05, "loss": 0.0656, "theoretical_loss": 3.3366321980177616, "tokens_seen": 3152805888 }, { "epoch": 0.96, "learning_rate": 4.49366072861499e-05, "loss": 0.0623, "theoretical_loss": 3.3366112633010463, "tokens_seen": 3153068032 }, { "epoch": 0.96, "learning_rate": 4.4856363344567486e-05, "loss": 0.0658, "theoretical_loss": 3.3365903308120504, "tokens_seen": 3153330176 }, { "epoch": 0.96, "learning_rate": 4.477611940298507e-05, "loss": 0.0627, "theoretical_loss": 3.336569400550352, "tokens_seen": 3153592320 }, { "epoch": 0.96, "learning_rate": 4.469587546140266e-05, "loss": 0.0608, "theoretical_loss": 3.3365484725155286, "tokens_seen": 3153854464 }, { "epoch": 0.96, "learning_rate": 4.4615631519820256e-05, "loss": 0.063, "theoretical_loss": 3.3365275467071585, "tokens_seen": 3154116608 }, { "epoch": 0.96, "learning_rate": 4.453538757823784e-05, "loss": 0.0615, "theoretical_loss": 3.33650662312482, "tokens_seen": 3154378752 }, { "epoch": 0.96, "learning_rate": 4.445514363665543e-05, "loss": 0.0635, "theoretical_loss": 3.3364857017680913, "tokens_seen": 3154640896 }, { "epoch": 0.96, "learning_rate": 4.4374899695073026e-05, "loss": 0.0654, "theoretical_loss": 3.3364647826365506, "tokens_seen": 3154903040 }, { "epoch": 0.96, "learning_rate": 4.429465575349061e-05, "loss": 0.0657, "theoretical_loss": 3.3364438657297764, "tokens_seen": 3155165184 }, { "epoch": 0.96, "learning_rate": 4.4214411811908203e-05, "loss": 0.0642, "theoretical_loss": 3.3364229510473473, "tokens_seen": 3155427328 }, { "epoch": 0.96, "learning_rate": 4.4134167870325796e-05, "loss": 0.0647, "theoretical_loss": 3.3364020385888415, "tokens_seen": 3155689472 }, { "epoch": 0.96, "learning_rate": 4.405392392874338e-05, "loss": 0.0658, "theoretical_loss": 3.3363811283538385, "tokens_seen": 3155951616 }, { "epoch": 0.96, "learning_rate": 4.3973679987160974e-05, "loss": 0.0621, "theoretical_loss": 3.336360220341917, "tokens_seen": 3156213760 }, { "epoch": 0.96, "learning_rate": 4.389343604557856e-05, "loss": 0.0642, "theoretical_loss": 3.3363393145526565, "tokens_seen": 3156475904 }, { "epoch": 0.96, "learning_rate": 4.3813192103996145e-05, "loss": 0.0645, "theoretical_loss": 3.336318410985635, "tokens_seen": 3156738048 }, { "epoch": 0.96, "learning_rate": 4.373294816241374e-05, "loss": 0.0659, "theoretical_loss": 3.336297509640433, "tokens_seen": 3157000192 }, { "epoch": 0.96, "learning_rate": 4.365270422083133e-05, "loss": 0.0639, "theoretical_loss": 3.336276610516629, "tokens_seen": 3157262336 }, { "epoch": 0.96, "learning_rate": 4.3572460279248915e-05, "loss": 0.0657, "theoretical_loss": 3.3362557136138027, "tokens_seen": 3157524480 }, { "epoch": 0.96, "learning_rate": 4.349221633766651e-05, "loss": 0.0629, "theoretical_loss": 3.3362348189315343, "tokens_seen": 3157786624 }, { "epoch": 0.96, "learning_rate": 4.34119723960841e-05, "loss": 0.0618, "theoretical_loss": 3.3362139264694024, "tokens_seen": 3158048768 }, { "epoch": 0.96, "learning_rate": 4.3331728454501685e-05, "loss": 0.0667, "theoretical_loss": 3.3361930362269883, "tokens_seen": 3158310912 }, { "epoch": 0.96, "learning_rate": 4.325148451291928e-05, "loss": 0.0633, "theoretical_loss": 3.336172148203871, "tokens_seen": 3158573056 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.0004656356177292764, "objective/train/docs_used": 1147634, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2245007753372192, "objective/train/original_loss": 1.2245006561279297, "objective/train/theoretical_loss": 3.33615126239963, "objective/train/tokens_used": 3179295200, "objective/train/value_avg": -0.006221771240234375, "objective/train/value_loss": 0.00020156294340267777, "objective/train/value_max": -1.1563301086425781e-05, "objective/train/value_min": -0.97802734375, "objective/train/value_reward_corr": 0.617885544008769, "objective/train/value_std": 0.0126495361328125, "objective/train/weight_avg": 1.0005531311035156, "objective/train/weighted_lm_loss": 1.2248764038085938, "objective/train/weights_max": 1.505949854850769, "objective/train/weights_min": 0.3681730329990387, "theoretical_loss": 3.33615126239963, "tokens_seen": 3158835200 }, { "epoch": 0.96, "learning_rate": 4.317124057133687e-05, "loss": 0.0664, "theoretical_loss": 3.33615126239963, "tokens_seen": 3158835200 }, { "epoch": 0.96, "learning_rate": 4.3090996629754455e-05, "loss": 0.0644, "theoretical_loss": 3.336130378813847, "tokens_seen": 3159097344 }, { "epoch": 0.96, "learning_rate": 4.301075268817205e-05, "loss": 0.0628, "theoretical_loss": 3.3361094974461016, "tokens_seen": 3159359488 }, { "epoch": 0.96, "learning_rate": 4.293050874658963e-05, "loss": 0.0661, "theoretical_loss": 3.3360886182959737, "tokens_seen": 3159621632 }, { "epoch": 0.96, "learning_rate": 4.285026480500722e-05, "loss": 0.0648, "theoretical_loss": 3.3360677413630446, "tokens_seen": 3159883776 }, { "epoch": 0.96, "learning_rate": 4.277002086342481e-05, "loss": 0.0681, "theoretical_loss": 3.3360468666468943, "tokens_seen": 3160145920 }, { "epoch": 0.96, "learning_rate": 4.26897769218424e-05, "loss": 0.063, "theoretical_loss": 3.336025994147104, "tokens_seen": 3160408064 }, { "epoch": 0.96, "learning_rate": 4.260953298025999e-05, "loss": 0.0635, "theoretical_loss": 3.3360051238632544, "tokens_seen": 3160670208 }, { "epoch": 0.96, "learning_rate": 4.252928903867758e-05, "loss": 0.0648, "theoretical_loss": 3.3359842557949264, "tokens_seen": 3160932352 }, { "epoch": 0.96, "learning_rate": 4.244904509709517e-05, "loss": 0.0618, "theoretical_loss": 3.3359633899417016, "tokens_seen": 3161194496 }, { "epoch": 0.96, "learning_rate": 4.236880115551276e-05, "loss": 0.0635, "theoretical_loss": 3.335942526303161, "tokens_seen": 3161456640 }, { "epoch": 0.96, "learning_rate": 4.228855721393035e-05, "loss": 0.0615, "theoretical_loss": 3.335921664878885, "tokens_seen": 3161718784 }, { "epoch": 0.96, "learning_rate": 4.220831327234794e-05, "loss": 0.0654, "theoretical_loss": 3.3359008056684565, "tokens_seen": 3161980928 }, { "epoch": 0.96, "learning_rate": 4.212806933076553e-05, "loss": 0.0619, "theoretical_loss": 3.335879948671456, "tokens_seen": 3162243072 }, { "epoch": 0.96, "learning_rate": 4.2047825389183114e-05, "loss": 0.0649, "theoretical_loss": 3.3358590938874655, "tokens_seen": 3162505216 }, { "epoch": 0.96, "learning_rate": 4.1967581447600706e-05, "loss": 0.0643, "theoretical_loss": 3.335838241316067, "tokens_seen": 3162767360 }, { "epoch": 0.96, "learning_rate": 4.188733750601829e-05, "loss": 0.0639, "theoretical_loss": 3.335817390956842, "tokens_seen": 3163029504 }, { "epoch": 0.96, "learning_rate": 4.1807093564435884e-05, "loss": 0.0613, "theoretical_loss": 3.3357965428093728, "tokens_seen": 3163291648 }, { "epoch": 0.96, "learning_rate": 4.1726849622853476e-05, "loss": 0.0649, "theoretical_loss": 3.3357756968732413, "tokens_seen": 3163553792 }, { "epoch": 0.96, "learning_rate": 4.164660568127106e-05, "loss": 0.0659, "theoretical_loss": 3.33575485314803, "tokens_seen": 3163815936 }, { "epoch": 0.96, "learning_rate": 4.1566361739688654e-05, "loss": 0.065, "theoretical_loss": 3.335734011633321, "tokens_seen": 3164078080 }, { "epoch": 0.96, "learning_rate": 4.1486117798106246e-05, "loss": 0.0661, "theoretical_loss": 3.3357131723286972, "tokens_seen": 3164340224 }, { "epoch": 0.96, "learning_rate": 4.140587385652383e-05, "loss": 0.0639, "theoretical_loss": 3.33569233523374, "tokens_seen": 3164602368 }, { "epoch": 0.96, "learning_rate": 4.1325629914941424e-05, "loss": 0.0661, "theoretical_loss": 3.3356715003480337, "tokens_seen": 3164864512 }, { "epoch": 0.96, "learning_rate": 4.1245385973359016e-05, "loss": 0.065, "theoretical_loss": 3.3356506676711604, "tokens_seen": 3165126656 }, { "epoch": 0.96, "objective/train/advantage_avg": -0.00016843386401887983, "objective/train/docs_used": 1150157, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2679888010025024, "objective/train/original_loss": 1.267988920211792, "objective/train/theoretical_loss": 3.3356298372027027, "objective/train/tokens_used": 3185848800, "objective/train/value_avg": -0.006591796875, "objective/train/value_loss": 0.00019000943575520068, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.6064453125, "objective/train/value_reward_corr": 0.6557403656697849, "objective/train/value_std": 0.012451171875, "objective/train/weight_avg": 0.9999187588691711, "objective/train/weighted_lm_loss": 1.2679390907287598, "objective/train/weights_max": 1.687270164489746, "objective/train/weights_min": 0.3683769106864929, "theoretical_loss": 3.3356298372027027, "tokens_seen": 3165388800 }, { "epoch": 0.96, "learning_rate": 4.11651420317766e-05, "loss": 0.0659, "theoretical_loss": 3.3356298372027027, "tokens_seen": 3165388800 }, { "epoch": 0.96, "learning_rate": 4.108489809019419e-05, "loss": 0.0629, "theoretical_loss": 3.335609008942244, "tokens_seen": 3165650944 }, { "epoch": 0.96, "learning_rate": 4.100465414861178e-05, "loss": 0.0623, "theoretical_loss": 3.335588182889367, "tokens_seen": 3165913088 }, { "epoch": 0.96, "learning_rate": 4.0924410207029365e-05, "loss": 0.0623, "theoretical_loss": 3.3355673590436554, "tokens_seen": 3166175232 }, { "epoch": 0.96, "learning_rate": 4.084416626544696e-05, "loss": 0.0615, "theoretical_loss": 3.3355465374046926, "tokens_seen": 3166437376 }, { "epoch": 0.96, "learning_rate": 4.076392232386455e-05, "loss": 0.066, "theoretical_loss": 3.335525717972062, "tokens_seen": 3166699520 }, { "epoch": 0.96, "learning_rate": 4.0683678382282135e-05, "loss": 0.0649, "theoretical_loss": 3.335504900745347, "tokens_seen": 3166961664 }, { "epoch": 0.96, "learning_rate": 4.060343444069973e-05, "loss": 0.0656, "theoretical_loss": 3.335484085724132, "tokens_seen": 3167223808 }, { "epoch": 0.96, "learning_rate": 4.052319049911732e-05, "loss": 0.062, "theoretical_loss": 3.335463272907999, "tokens_seen": 3167485952 }, { "epoch": 0.96, "learning_rate": 4.0442946557534905e-05, "loss": 0.0653, "theoretical_loss": 3.335442462296534, "tokens_seen": 3167748096 }, { "epoch": 0.96, "learning_rate": 4.03627026159525e-05, "loss": 0.0629, "theoretical_loss": 3.3354216538893207, "tokens_seen": 3168010240 }, { "epoch": 0.96, "learning_rate": 4.028245867437009e-05, "loss": 0.0671, "theoretical_loss": 3.335400847685942, "tokens_seen": 3168272384 }, { "epoch": 0.96, "learning_rate": 4.0202214732787675e-05, "loss": 0.0656, "theoretical_loss": 3.335380043685983, "tokens_seen": 3168534528 }, { "epoch": 0.96, "learning_rate": 4.012197079120526e-05, "loss": 0.0647, "theoretical_loss": 3.3353592418890283, "tokens_seen": 3168796672 }, { "epoch": 0.96, "learning_rate": 4.004172684962285e-05, "loss": 0.0617, "theoretical_loss": 3.3353384422946624, "tokens_seen": 3169058816 }, { "epoch": 0.96, "learning_rate": 3.996148290804044e-05, "loss": 0.0631, "theoretical_loss": 3.335317644902469, "tokens_seen": 3169320960 }, { "epoch": 0.96, "learning_rate": 3.988123896645803e-05, "loss": 0.0642, "theoretical_loss": 3.335296849712034, "tokens_seen": 3169583104 }, { "epoch": 0.96, "learning_rate": 3.980099502487562e-05, "loss": 0.0641, "theoretical_loss": 3.335276056722942, "tokens_seen": 3169845248 }, { "epoch": 0.96, "learning_rate": 3.972075108329321e-05, "loss": 0.0651, "theoretical_loss": 3.3352552659347774, "tokens_seen": 3170107392 }, { "epoch": 0.96, "learning_rate": 3.96405071417108e-05, "loss": 0.0642, "theoretical_loss": 3.335234477347125, "tokens_seen": 3170369536 }, { "epoch": 0.96, "learning_rate": 3.956026320012839e-05, "loss": 0.0615, "theoretical_loss": 3.3352136909595713, "tokens_seen": 3170631680 }, { "epoch": 0.96, "learning_rate": 3.9480019258545986e-05, "loss": 0.0638, "theoretical_loss": 3.3351929067717005, "tokens_seen": 3170893824 }, { "epoch": 0.96, "learning_rate": 3.939977531696357e-05, "loss": 0.0641, "theoretical_loss": 3.335172124783098, "tokens_seen": 3171155968 }, { "epoch": 0.96, "learning_rate": 3.931953137538116e-05, "loss": 0.0655, "theoretical_loss": 3.33515134499335, "tokens_seen": 3171418112 }, { "epoch": 0.96, "learning_rate": 3.9239287433798756e-05, "loss": 0.0643, "theoretical_loss": 3.3351305674020417, "tokens_seen": 3171680256 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.0003502919862512499, "objective/train/docs_used": 1152731, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2793630361557007, "objective/train/original_loss": 1.2793629169464111, "objective/train/theoretical_loss": 3.3351097920087587, "objective/train/tokens_used": 3192402400, "objective/train/value_avg": -0.00690460205078125, "objective/train/value_loss": 0.00017006472626235336, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.81591796875, "objective/train/value_reward_corr": 0.7036529803661943, "objective/train/value_std": 0.01306915283203125, "objective/train/weight_avg": 1.0004277229309082, "objective/train/weighted_lm_loss": 1.2795383930206299, "objective/train/weights_max": 1.226123332977295, "objective/train/weights_min": 0.3684985041618347, "theoretical_loss": 3.3351097920087587, "tokens_seen": 3171942400 }, { "epoch": 0.96, "learning_rate": 3.9159043492216334e-05, "loss": 0.0648, "theoretical_loss": 3.3351097920087587, "tokens_seen": 3171942400 }, { "epoch": 0.96, "learning_rate": 3.907879955063393e-05, "loss": 0.0643, "theoretical_loss": 3.335089018813087, "tokens_seen": 3172204544 }, { "epoch": 0.96, "learning_rate": 3.899855560905152e-05, "loss": 0.0642, "theoretical_loss": 3.3350682478146125, "tokens_seen": 3172466688 }, { "epoch": 0.96, "learning_rate": 3.8918311667469104e-05, "loss": 0.0658, "theoretical_loss": 3.3350474790129216, "tokens_seen": 3172728832 }, { "epoch": 0.96, "learning_rate": 3.88380677258867e-05, "loss": 0.062, "theoretical_loss": 3.3350267124076, "tokens_seen": 3172990976 }, { "epoch": 0.96, "learning_rate": 3.875782378430429e-05, "loss": 0.0668, "theoretical_loss": 3.335005947998235, "tokens_seen": 3173253120 }, { "epoch": 0.96, "learning_rate": 3.8677579842721875e-05, "loss": 0.0641, "theoretical_loss": 3.3349851857844115, "tokens_seen": 3173515264 }, { "epoch": 0.96, "learning_rate": 3.859733590113947e-05, "loss": 0.0629, "theoretical_loss": 3.3349644257657167, "tokens_seen": 3173777408 }, { "epoch": 0.96, "learning_rate": 3.851709195955706e-05, "loss": 0.0642, "theoretical_loss": 3.3349436679417375, "tokens_seen": 3174039552 }, { "epoch": 0.96, "learning_rate": 3.8436848017974645e-05, "loss": 0.063, "theoretical_loss": 3.3349229123120603, "tokens_seen": 3174301696 }, { "epoch": 0.96, "learning_rate": 3.835660407639224e-05, "loss": 0.0661, "theoretical_loss": 3.334902158876272, "tokens_seen": 3174563840 }, { "epoch": 0.96, "learning_rate": 3.827636013480982e-05, "loss": 0.0644, "theoretical_loss": 3.33488140763396, "tokens_seen": 3174825984 }, { "epoch": 0.96, "learning_rate": 3.819611619322741e-05, "loss": 0.0649, "theoretical_loss": 3.334860658584711, "tokens_seen": 3175088128 }, { "epoch": 0.96, "learning_rate": 3.8115872251645e-05, "loss": 0.0642, "theoretical_loss": 3.3348399117281122, "tokens_seen": 3175350272 }, { "epoch": 0.96, "learning_rate": 3.803562831006259e-05, "loss": 0.0633, "theoretical_loss": 3.3348191670637513, "tokens_seen": 3175612416 }, { "epoch": 0.96, "learning_rate": 3.795538436848018e-05, "loss": 0.066, "theoretical_loss": 3.334798424591215, "tokens_seen": 3175874560 }, { "epoch": 0.96, "learning_rate": 3.787514042689777e-05, "loss": 0.0649, "theoretical_loss": 3.334777684310091, "tokens_seen": 3176136704 }, { "epoch": 0.96, "learning_rate": 3.779489648531536e-05, "loss": 0.0666, "theoretical_loss": 3.3347569462199678, "tokens_seen": 3176398848 }, { "epoch": 0.96, "learning_rate": 3.771465254373295e-05, "loss": 0.0669, "theoretical_loss": 3.3347362103204317, "tokens_seen": 3176660992 }, { "epoch": 0.96, "learning_rate": 3.763440860215054e-05, "loss": 0.0632, "theoretical_loss": 3.3347154766110716, "tokens_seen": 3176923136 }, { "epoch": 0.96, "learning_rate": 3.755416466056813e-05, "loss": 0.0631, "theoretical_loss": 3.3346947450914755, "tokens_seen": 3177185280 }, { "epoch": 0.96, "learning_rate": 3.747392071898572e-05, "loss": 0.0649, "theoretical_loss": 3.334674015761231, "tokens_seen": 3177447424 }, { "epoch": 0.96, "learning_rate": 3.739367677740331e-05, "loss": 0.0642, "theoretical_loss": 3.3346532886199265, "tokens_seen": 3177709568 }, { "epoch": 0.96, "learning_rate": 3.7313432835820896e-05, "loss": 0.0636, "theoretical_loss": 3.3346325636671503, "tokens_seen": 3177971712 }, { "epoch": 0.96, "learning_rate": 3.723318889423848e-05, "loss": 0.0634, "theoretical_loss": 3.334611840902491, "tokens_seen": 3178233856 }, { "epoch": 0.96, "objective/train/advantage_avg": 2.5288290999014862e-05, "objective/train/docs_used": 1155119, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3060029745101929, "objective/train/original_loss": 1.3060028553009033, "objective/train/theoretical_loss": 3.3345911203255367, "objective/train/tokens_used": 3198956000, "objective/train/value_avg": -0.00821685791015625, "objective/train/value_loss": 0.00039423233829438686, "objective/train/value_max": -2.092123031616211e-05, "objective/train/value_min": -0.9677734375, "objective/train/value_reward_corr": 0.6660958908671027, "objective/train/value_std": 0.0167999267578125, "objective/train/weight_avg": 1.0001922845840454, "objective/train/weighted_lm_loss": 1.3057191371917725, "objective/train/weights_max": 1.4924074411392212, "objective/train/weights_min": 0.3686675727367401, "theoretical_loss": 3.3345911203255367, "tokens_seen": 3178496000 }, { "epoch": 0.96, "learning_rate": 3.7152944952656074e-05, "loss": 0.0628, "theoretical_loss": 3.3345911203255367, "tokens_seen": 3178496000 }, { "epoch": 0.96, "learning_rate": 3.7072701011073666e-05, "loss": 0.065, "theoretical_loss": 3.334570401935877, "tokens_seen": 3178758144 }, { "epoch": 0.96, "learning_rate": 3.699245706949125e-05, "loss": 0.0641, "theoretical_loss": 3.334549685733099, "tokens_seen": 3179020288 }, { "epoch": 0.96, "learning_rate": 3.6912213127908844e-05, "loss": 0.0639, "theoretical_loss": 3.334528971716793, "tokens_seen": 3179282432 }, { "epoch": 0.96, "learning_rate": 3.6831969186326436e-05, "loss": 0.0643, "theoretical_loss": 3.3345082598865474, "tokens_seen": 3179544576 }, { "epoch": 0.96, "learning_rate": 3.675172524474402e-05, "loss": 0.0648, "theoretical_loss": 3.3344875502419513, "tokens_seen": 3179806720 }, { "epoch": 0.96, "learning_rate": 3.6671481303161614e-05, "loss": 0.0634, "theoretical_loss": 3.3344668427825943, "tokens_seen": 3180068864 }, { "epoch": 0.96, "learning_rate": 3.6591237361579206e-05, "loss": 0.0675, "theoretical_loss": 3.334446137508065, "tokens_seen": 3180331008 }, { "epoch": 0.96, "learning_rate": 3.651099341999679e-05, "loss": 0.0638, "theoretical_loss": 3.3344254344179536, "tokens_seen": 3180593152 }, { "epoch": 0.96, "learning_rate": 3.6430749478414384e-05, "loss": 0.0651, "theoretical_loss": 3.334404733511849, "tokens_seen": 3180855296 }, { "epoch": 0.96, "learning_rate": 3.635050553683197e-05, "loss": 0.0654, "theoretical_loss": 3.334384034789341, "tokens_seen": 3181117440 }, { "epoch": 0.96, "learning_rate": 3.6270261595249555e-05, "loss": 0.0638, "theoretical_loss": 3.33436333825002, "tokens_seen": 3181379584 }, { "epoch": 0.96, "learning_rate": 3.619001765366715e-05, "loss": 0.0685, "theoretical_loss": 3.334342643893475, "tokens_seen": 3181641728 }, { "epoch": 0.96, "learning_rate": 3.610977371208474e-05, "loss": 0.0656, "theoretical_loss": 3.3343219517192964, "tokens_seen": 3181903872 }, { "epoch": 0.96, "learning_rate": 3.6029529770502325e-05, "loss": 0.0659, "theoretical_loss": 3.334301261727074, "tokens_seen": 3182166016 }, { "epoch": 0.96, "learning_rate": 3.594928582891992e-05, "loss": 0.0664, "theoretical_loss": 3.3342805739163985, "tokens_seen": 3182428160 }, { "epoch": 0.96, "learning_rate": 3.586904188733751e-05, "loss": 0.0644, "theoretical_loss": 3.3342598882868595, "tokens_seen": 3182690304 }, { "epoch": 0.96, "learning_rate": 3.5788797945755095e-05, "loss": 0.0662, "theoretical_loss": 3.334239204838048, "tokens_seen": 3182952448 }, { "epoch": 0.96, "learning_rate": 3.570855400417269e-05, "loss": 0.0635, "theoretical_loss": 3.3342185235695543, "tokens_seen": 3183214592 }, { "epoch": 0.96, "learning_rate": 3.562831006259028e-05, "loss": 0.067, "theoretical_loss": 3.334197844480969, "tokens_seen": 3183476736 }, { "epoch": 0.96, "learning_rate": 3.5548066121007865e-05, "loss": 0.0673, "theoretical_loss": 3.334177167571883, "tokens_seen": 3183738880 }, { "epoch": 0.96, "learning_rate": 3.546782217942546e-05, "loss": 0.0641, "theoretical_loss": 3.334156492841887, "tokens_seen": 3184001024 }, { "epoch": 0.96, "learning_rate": 3.538757823784304e-05, "loss": 0.0665, "theoretical_loss": 3.3341358202905718, "tokens_seen": 3184263168 }, { "epoch": 0.97, "learning_rate": 3.530733429626063e-05, "loss": 0.0652, "theoretical_loss": 3.334115149917529, "tokens_seen": 3184525312 }, { "epoch": 0.97, "learning_rate": 3.522709035467822e-05, "loss": 0.0658, "theoretical_loss": 3.3340944817223495, "tokens_seen": 3184787456 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.0005753776640631258, "objective/train/docs_used": 1157476, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.251091718673706, "objective/train/original_loss": 1.251091718673706, "objective/train/theoretical_loss": 3.334073815704625, "objective/train/tokens_used": 3205509600, "objective/train/value_avg": -0.00763702392578125, "objective/train/value_loss": 0.0001186743684229441, "objective/train/value_max": -2.4318695068359375e-05, "objective/train/value_min": -0.250732421875, "objective/train/value_reward_corr": 0.8103845478100814, "objective/train/value_std": 0.0146942138671875, "objective/train/weight_avg": 1.0006309747695923, "objective/train/weighted_lm_loss": 1.2523066997528076, "objective/train/weights_max": 1.284966230392456, "objective/train/weights_min": 0.37953981757164, "theoretical_loss": 3.334073815704625, "tokens_seen": 3185049600 }, { "epoch": 0.97, "learning_rate": 3.514684641309581e-05, "loss": 0.0626, "theoretical_loss": 3.334073815704625, "tokens_seen": 3185049600 }, { "epoch": 0.97, "learning_rate": 3.50666024715134e-05, "loss": 0.0669, "theoretical_loss": 3.3340531518639462, "tokens_seen": 3185311744 }, { "epoch": 0.97, "learning_rate": 3.498635852993099e-05, "loss": 0.0671, "theoretical_loss": 3.3340324901999048, "tokens_seen": 3185573888 }, { "epoch": 0.97, "learning_rate": 3.490611458834858e-05, "loss": 0.0632, "theoretical_loss": 3.3340118307120927, "tokens_seen": 3185836032 }, { "epoch": 0.97, "learning_rate": 3.482587064676617e-05, "loss": 0.066, "theoretical_loss": 3.3339911734001015, "tokens_seen": 3186098176 }, { "epoch": 0.97, "learning_rate": 3.474562670518376e-05, "loss": 0.0664, "theoretical_loss": 3.3339705182635235, "tokens_seen": 3186360320 }, { "epoch": 0.97, "learning_rate": 3.466538276360135e-05, "loss": 0.0667, "theoretical_loss": 3.3339498653019497, "tokens_seen": 3186622464 }, { "epoch": 0.97, "learning_rate": 3.458513882201894e-05, "loss": 0.0658, "theoretical_loss": 3.3339292145149733, "tokens_seen": 3186884608 }, { "epoch": 0.97, "learning_rate": 3.450489488043653e-05, "loss": 0.065, "theoretical_loss": 3.3339085659021857, "tokens_seen": 3187146752 }, { "epoch": 0.97, "learning_rate": 3.4424650938854116e-05, "loss": 0.0627, "theoretical_loss": 3.333887919463179, "tokens_seen": 3187408896 }, { "epoch": 0.97, "learning_rate": 3.43444069972717e-05, "loss": 0.0645, "theoretical_loss": 3.333867275197546, "tokens_seen": 3187671040 }, { "epoch": 0.97, "learning_rate": 3.4264163055689294e-05, "loss": 0.0652, "theoretical_loss": 3.33384663310488, "tokens_seen": 3187933184 }, { "epoch": 0.97, "learning_rate": 3.4183919114106887e-05, "loss": 0.0637, "theoretical_loss": 3.3338259931847727, "tokens_seen": 3188195328 }, { "epoch": 0.97, "learning_rate": 3.410367517252447e-05, "loss": 0.0655, "theoretical_loss": 3.3338053554368163, "tokens_seen": 3188457472 }, { "epoch": 0.97, "learning_rate": 3.4023431230942064e-05, "loss": 0.0675, "theoretical_loss": 3.333784719860605, "tokens_seen": 3188719616 }, { "epoch": 0.97, "learning_rate": 3.394318728935966e-05, "loss": 0.0642, "theoretical_loss": 3.333764086455731, "tokens_seen": 3188981760 }, { "epoch": 0.97, "learning_rate": 3.386294334777724e-05, "loss": 0.0622, "theoretical_loss": 3.333743455221787, "tokens_seen": 3189243904 }, { "epoch": 0.97, "learning_rate": 3.3782699406194834e-05, "loss": 0.0631, "theoretical_loss": 3.333722826158367, "tokens_seen": 3189506048 }, { "epoch": 0.97, "learning_rate": 3.370245546461243e-05, "loss": 0.064, "theoretical_loss": 3.3337021992650637, "tokens_seen": 3189768192 }, { "epoch": 0.97, "learning_rate": 3.362221152303001e-05, "loss": 0.0626, "theoretical_loss": 3.3336815745414707, "tokens_seen": 3190030336 }, { "epoch": 0.97, "learning_rate": 3.35419675814476e-05, "loss": 0.0644, "theoretical_loss": 3.3336609519871816, "tokens_seen": 3190292480 }, { "epoch": 0.97, "learning_rate": 3.346172363986519e-05, "loss": 0.0636, "theoretical_loss": 3.33364033160179, "tokens_seen": 3190554624 }, { "epoch": 0.97, "learning_rate": 3.3381479698282775e-05, "loss": 0.0649, "theoretical_loss": 3.3336197133848895, "tokens_seen": 3190816768 }, { "epoch": 0.97, "learning_rate": 3.330123575670037e-05, "loss": 0.0651, "theoretical_loss": 3.333599097336074, "tokens_seen": 3191078912 }, { "epoch": 0.97, "learning_rate": 3.322099181511796e-05, "loss": 0.0653, "theoretical_loss": 3.3335784834549367, "tokens_seen": 3191341056 }, { "epoch": 0.97, "objective/train/advantage_avg": -0.0003126697556581348, "objective/train/docs_used": 1159894, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2759596109390259, "objective/train/original_loss": 1.2759596109390259, "objective/train/theoretical_loss": 3.333557871741073, "objective/train/tokens_used": 3212063200, "objective/train/value_avg": -0.007526397705078125, "objective/train/value_loss": 0.0002957968390546739, "objective/train/value_max": -1.3649463653564453e-05, "objective/train/value_min": -0.6982421875, "objective/train/value_reward_corr": 0.7892284286544861, "objective/train/value_std": 0.0211029052734375, "objective/train/weight_avg": 0.9998196959495544, "objective/train/weighted_lm_loss": 1.2751232385635376, "objective/train/weights_max": 1.3489364385604858, "objective/train/weights_min": 0.3683748245239258, "theoretical_loss": 3.333557871741073, "tokens_seen": 3191603200 }, { "epoch": 0.97, "learning_rate": 3.3140747873535546e-05, "loss": 0.0649, "theoretical_loss": 3.333557871741073, "tokens_seen": 3191603200 }, { "epoch": 0.97, "learning_rate": 3.306050393195314e-05, "loss": 0.0629, "theoretical_loss": 3.3335372621940764, "tokens_seen": 3191865344 }, { "epoch": 0.97, "learning_rate": 3.298025999037073e-05, "loss": 0.0637, "theoretical_loss": 3.333516654813541, "tokens_seen": 3192127488 }, { "epoch": 0.97, "learning_rate": 3.2900016048788316e-05, "loss": 0.064, "theoretical_loss": 3.3334960495990615, "tokens_seen": 3192389632 }, { "epoch": 0.97, "learning_rate": 3.281977210720591e-05, "loss": 0.0638, "theoretical_loss": 3.3334754465502323, "tokens_seen": 3192651776 }, { "epoch": 0.97, "learning_rate": 3.27395281656235e-05, "loss": 0.0657, "theoretical_loss": 3.3334548456666475, "tokens_seen": 3192913920 }, { "epoch": 0.97, "learning_rate": 3.2659284224041086e-05, "loss": 0.0651, "theoretical_loss": 3.3334342469479026, "tokens_seen": 3193176064 }, { "epoch": 0.97, "learning_rate": 3.257904028245867e-05, "loss": 0.0647, "theoretical_loss": 3.3334136503935916, "tokens_seen": 3193438208 }, { "epoch": 0.97, "learning_rate": 3.2498796340876264e-05, "loss": 0.0659, "theoretical_loss": 3.3333930560033105, "tokens_seen": 3193700352 }, { "epoch": 0.97, "learning_rate": 3.241855239929385e-05, "loss": 0.0644, "theoretical_loss": 3.3333724637766533, "tokens_seen": 3193962496 }, { "epoch": 0.97, "learning_rate": 3.233830845771144e-05, "loss": 0.0683, "theoretical_loss": 3.333351873713216, "tokens_seen": 3194224640 }, { "epoch": 0.97, "learning_rate": 3.2258064516129034e-05, "loss": 0.0647, "theoretical_loss": 3.333331285812593, "tokens_seen": 3194486784 }, { "epoch": 0.97, "learning_rate": 3.217782057454662e-05, "loss": 0.0626, "theoretical_loss": 3.33331070007438, "tokens_seen": 3194748928 }, { "epoch": 0.97, "learning_rate": 3.209757663296421e-05, "loss": 0.0639, "theoretical_loss": 3.3332901164981728, "tokens_seen": 3195011072 }, { "epoch": 0.97, "learning_rate": 3.2017332691381804e-05, "loss": 0.0628, "theoretical_loss": 3.3332695350835664, "tokens_seen": 3195273216 }, { "epoch": 0.97, "learning_rate": 3.193708874979939e-05, "loss": 0.0646, "theoretical_loss": 3.333248955830157, "tokens_seen": 3195535360 }, { "epoch": 0.97, "learning_rate": 3.185684480821698e-05, "loss": 0.0646, "theoretical_loss": 3.33322837873754, "tokens_seen": 3195797504 }, { "epoch": 0.97, "learning_rate": 3.1776600866634574e-05, "loss": 0.0636, "theoretical_loss": 3.3332078038053115, "tokens_seen": 3196059648 }, { "epoch": 0.97, "learning_rate": 3.169635692505216e-05, "loss": 0.0635, "theoretical_loss": 3.3331872310330675, "tokens_seen": 3196321792 }, { "epoch": 0.97, "learning_rate": 3.1616112983469745e-05, "loss": 0.0618, "theoretical_loss": 3.333166660420404, "tokens_seen": 3196583936 }, { "epoch": 0.97, "learning_rate": 3.153586904188734e-05, "loss": 0.0635, "theoretical_loss": 3.333146091966918, "tokens_seen": 3196846080 }, { "epoch": 0.97, "learning_rate": 3.145562510030492e-05, "loss": 0.0634, "theoretical_loss": 3.3331255256722043, "tokens_seen": 3197108224 }, { "epoch": 0.97, "learning_rate": 3.1375381158722515e-05, "loss": 0.0633, "theoretical_loss": 3.3331049615358608, "tokens_seen": 3197370368 }, { "epoch": 0.97, "learning_rate": 3.129513721714011e-05, "loss": 0.065, "theoretical_loss": 3.333084399557483, "tokens_seen": 3197632512 }, { "epoch": 0.97, "learning_rate": 3.121489327555769e-05, "loss": 0.0626, "theoretical_loss": 3.3330638397366683, "tokens_seen": 3197894656 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.00034200731897726655, "objective/train/docs_used": 1162228, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.1946808099746704, "objective/train/original_loss": 1.1946806907653809, "objective/train/theoretical_loss": 3.3330432820730134, "objective/train/tokens_used": 3218616800, "objective/train/value_avg": -0.007663726806640625, "objective/train/value_loss": 0.00036682773497886956, "objective/train/value_max": -2.753734588623047e-05, "objective/train/value_min": -0.97705078125, "objective/train/value_reward_corr": 0.6712799686665404, "objective/train/value_std": 0.01715087890625, "objective/train/weight_avg": 1.000498652458191, "objective/train/weighted_lm_loss": 1.1946550607681274, "objective/train/weights_max": 1.920973539352417, "objective/train/weights_min": 0.2709263861179352, "theoretical_loss": 3.3330432820730134, "tokens_seen": 3198156800 }, { "epoch": 0.97, "learning_rate": 3.1134649333975285e-05, "loss": 0.0643, "theoretical_loss": 3.3330432820730134, "tokens_seen": 3198156800 }, { "epoch": 0.97, "learning_rate": 3.105440539239288e-05, "loss": 0.0651, "theoretical_loss": 3.3330227265661145, "tokens_seen": 3198418944 }, { "epoch": 0.97, "learning_rate": 3.097416145081046e-05, "loss": 0.0647, "theoretical_loss": 3.3330021732155695, "tokens_seen": 3198681088 }, { "epoch": 0.97, "learning_rate": 3.0893917509228055e-05, "loss": 0.0635, "theoretical_loss": 3.332981622020975, "tokens_seen": 3198943232 }, { "epoch": 0.97, "learning_rate": 3.081367356764564e-05, "loss": 0.0623, "theoretical_loss": 3.332961072981928, "tokens_seen": 3199205376 }, { "epoch": 0.97, "learning_rate": 3.073342962606323e-05, "loss": 0.0642, "theoretical_loss": 3.3329405260980267, "tokens_seen": 3199467520 }, { "epoch": 0.97, "learning_rate": 3.0653185684480825e-05, "loss": 0.0631, "theoretical_loss": 3.3329199813688675, "tokens_seen": 3199729664 }, { "epoch": 0.97, "learning_rate": 3.057294174289841e-05, "loss": 0.0653, "theoretical_loss": 3.3328994387940485, "tokens_seen": 3199991808 }, { "epoch": 0.97, "learning_rate": 3.0492697801316003e-05, "loss": 0.066, "theoretical_loss": 3.332878898373167, "tokens_seen": 3200253952 }, { "epoch": 0.97, "learning_rate": 3.041245385973359e-05, "loss": 0.0636, "theoretical_loss": 3.332858360105821, "tokens_seen": 3200516096 }, { "epoch": 0.97, "learning_rate": 3.033220991815118e-05, "loss": 0.0631, "theoretical_loss": 3.3328378239916083, "tokens_seen": 3200778240 }, { "epoch": 0.97, "learning_rate": 3.025196597656877e-05, "loss": 0.0648, "theoretical_loss": 3.3328172900301265, "tokens_seen": 3201040384 }, { "epoch": 0.97, "learning_rate": 3.017172203498636e-05, "loss": 0.0626, "theoretical_loss": 3.3327967582209745, "tokens_seen": 3201302528 }, { "epoch": 0.97, "learning_rate": 3.009147809340395e-05, "loss": 0.0637, "theoretical_loss": 3.33277622856375, "tokens_seen": 3201564672 }, { "epoch": 0.97, "learning_rate": 3.001123415182154e-05, "loss": 0.0636, "theoretical_loss": 3.332755701058051, "tokens_seen": 3201826816 }, { "epoch": 0.97, "learning_rate": 2.9930990210239125e-05, "loss": 0.0646, "theoretical_loss": 3.332735175703476, "tokens_seen": 3202088960 }, { "epoch": 0.97, "learning_rate": 2.9850746268656717e-05, "loss": 0.0637, "theoretical_loss": 3.3327146524996243, "tokens_seen": 3202351104 }, { "epoch": 0.97, "learning_rate": 2.9770502327074306e-05, "loss": 0.0628, "theoretical_loss": 3.332694131446093, "tokens_seen": 3202613248 }, { "epoch": 0.97, "learning_rate": 2.9690258385491895e-05, "loss": 0.0618, "theoretical_loss": 3.3326736125424827, "tokens_seen": 3202875392 }, { "epoch": 0.97, "learning_rate": 2.9610014443909487e-05, "loss": 0.0643, "theoretical_loss": 3.3326530957883906, "tokens_seen": 3203137536 }, { "epoch": 0.97, "learning_rate": 2.9529770502327073e-05, "loss": 0.0633, "theoretical_loss": 3.3326325811834163, "tokens_seen": 3203399680 }, { "epoch": 0.97, "learning_rate": 2.9449526560744662e-05, "loss": 0.064, "theoretical_loss": 3.332612068727159, "tokens_seen": 3203661824 }, { "epoch": 0.97, "learning_rate": 2.9369282619162254e-05, "loss": 0.0651, "theoretical_loss": 3.3325915584192174, "tokens_seen": 3203923968 }, { "epoch": 0.97, "learning_rate": 2.9289038677579843e-05, "loss": 0.0637, "theoretical_loss": 3.332571050259191, "tokens_seen": 3204186112 }, { "epoch": 0.97, "learning_rate": 2.9208794735997432e-05, "loss": 0.0596, "theoretical_loss": 3.3325505442466796, "tokens_seen": 3204448256 }, { "epoch": 0.97, "objective/train/advantage_avg": -0.00047033251030370593, "objective/train/docs_used": 1164453, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.324925184249878, "objective/train/original_loss": 1.3249250650405884, "objective/train/theoretical_loss": 3.3325300403812816, "objective/train/tokens_used": 3225170400, "objective/train/value_avg": -0.006519317626953125, "objective/train/value_loss": 0.00018281102529726923, "objective/train/value_max": -2.7120113372802734e-05, "objective/train/value_min": -0.314208984375, "objective/train/value_reward_corr": 0.8121445704998986, "objective/train/value_std": 0.01412200927734375, "objective/train/weight_avg": 0.9996156096458435, "objective/train/weighted_lm_loss": 1.3245081901550293, "objective/train/weights_max": 1.179583191871643, "objective/train/weights_min": 0.3681163489818573, "theoretical_loss": 3.3325300403812816, "tokens_seen": 3204710400 }, { "epoch": 0.97, "learning_rate": 2.9128550794415024e-05, "loss": 0.0657, "theoretical_loss": 3.3325300403812816, "tokens_seen": 3204710400 }, { "epoch": 0.97, "learning_rate": 2.904830685283261e-05, "loss": 0.0618, "theoretical_loss": 3.332509538662598, "tokens_seen": 3204972544 }, { "epoch": 0.97, "learning_rate": 2.89680629112502e-05, "loss": 0.0601, "theoretical_loss": 3.332489039090227, "tokens_seen": 3205234688 }, { "epoch": 0.97, "learning_rate": 2.888781896966779e-05, "loss": 0.062, "theoretical_loss": 3.3324685416637694, "tokens_seen": 3205496832 }, { "epoch": 0.97, "learning_rate": 2.880757502808538e-05, "loss": 0.0633, "theoretical_loss": 3.3324480463828245, "tokens_seen": 3205758976 }, { "epoch": 0.97, "learning_rate": 2.872733108650297e-05, "loss": 0.0649, "theoretical_loss": 3.3324275532469927, "tokens_seen": 3206021120 }, { "epoch": 0.97, "learning_rate": 2.864708714492056e-05, "loss": 0.0629, "theoretical_loss": 3.332407062255874, "tokens_seen": 3206283264 }, { "epoch": 0.97, "learning_rate": 2.8566843203338146e-05, "loss": 0.065, "theoretical_loss": 3.3323865734090687, "tokens_seen": 3206545408 }, { "epoch": 0.97, "learning_rate": 2.8486599261755735e-05, "loss": 0.0625, "theoretical_loss": 3.332366086706177, "tokens_seen": 3206807552 }, { "epoch": 0.97, "learning_rate": 2.8406355320173328e-05, "loss": 0.0653, "theoretical_loss": 3.3323456021467996, "tokens_seen": 3207069696 }, { "epoch": 0.97, "learning_rate": 2.8326111378590917e-05, "loss": 0.0622, "theoretical_loss": 3.3323251197305366, "tokens_seen": 3207331840 }, { "epoch": 0.97, "learning_rate": 2.824586743700851e-05, "loss": 0.0645, "theoretical_loss": 3.3323046394569893, "tokens_seen": 3207593984 }, { "epoch": 0.97, "learning_rate": 2.8165623495426098e-05, "loss": 0.0635, "theoretical_loss": 3.3322841613257577, "tokens_seen": 3207856128 }, { "epoch": 0.97, "learning_rate": 2.8085379553843683e-05, "loss": 0.0629, "theoretical_loss": 3.332263685336443, "tokens_seen": 3208118272 }, { "epoch": 0.97, "learning_rate": 2.8005135612261276e-05, "loss": 0.0659, "theoretical_loss": 3.3322432114886458, "tokens_seen": 3208380416 }, { "epoch": 0.97, "learning_rate": 2.7924891670678864e-05, "loss": 0.0627, "theoretical_loss": 3.332222739781968, "tokens_seen": 3208642560 }, { "epoch": 0.97, "learning_rate": 2.7844647729096453e-05, "loss": 0.0638, "theoretical_loss": 3.33220227021601, "tokens_seen": 3208904704 }, { "epoch": 0.97, "learning_rate": 2.7764403787514046e-05, "loss": 0.0641, "theoretical_loss": 3.332181802790374, "tokens_seen": 3209166848 }, { "epoch": 0.97, "learning_rate": 2.7684159845931635e-05, "loss": 0.0634, "theoretical_loss": 3.3321613375046604, "tokens_seen": 3209428992 }, { "epoch": 0.97, "learning_rate": 2.760391590434922e-05, "loss": 0.063, "theoretical_loss": 3.3321408743584713, "tokens_seen": 3209691136 }, { "epoch": 0.97, "learning_rate": 2.7523671962766812e-05, "loss": 0.0641, "theoretical_loss": 3.332120413351408, "tokens_seen": 3209953280 }, { "epoch": 0.97, "learning_rate": 2.74434280211844e-05, "loss": 0.0646, "theoretical_loss": 3.3320999544830725, "tokens_seen": 3210215424 }, { "epoch": 0.97, "learning_rate": 2.736318407960199e-05, "loss": 0.0639, "theoretical_loss": 3.3320794977530666, "tokens_seen": 3210477568 }, { "epoch": 0.97, "learning_rate": 2.7282940138019582e-05, "loss": 0.0615, "theoretical_loss": 3.332059043160992, "tokens_seen": 3210739712 }, { "epoch": 0.97, "learning_rate": 2.720269619643717e-05, "loss": 0.0626, "theoretical_loss": 3.3320385907064507, "tokens_seen": 3211001856 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.001090014586225152, "objective/train/docs_used": 1166890, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3051739931106567, "objective/train/original_loss": 1.3051739931106567, "objective/train/theoretical_loss": 3.3320181403890454, "objective/train/tokens_used": 3231724000, "objective/train/value_avg": -0.0057220458984375, "objective/train/value_loss": 0.00018149317475035787, "objective/train/value_max": -2.1636486053466797e-05, "objective/train/value_min": -0.97509765625, "objective/train/value_reward_corr": 0.5922709854946266, "objective/train/value_std": 0.01177215576171875, "objective/train/weight_avg": 1.0011723041534424, "objective/train/weighted_lm_loss": 1.306033730506897, "objective/train/weights_max": 1.4535712003707886, "objective/train/weights_min": 0.22798629105091095, "theoretical_loss": 3.3320181403890454, "tokens_seen": 3211264000 }, { "epoch": 0.97, "learning_rate": 2.7122452254854757e-05, "loss": 0.0626, "theoretical_loss": 3.3320181403890454, "tokens_seen": 3211264000 }, { "epoch": 0.97, "learning_rate": 2.704220831327235e-05, "loss": 0.0654, "theoretical_loss": 3.3319976922083776, "tokens_seen": 3211526144 }, { "epoch": 0.97, "learning_rate": 2.6961964371689938e-05, "loss": 0.0605, "theoretical_loss": 3.33197724616405, "tokens_seen": 3211788288 }, { "epoch": 0.97, "learning_rate": 2.6881720430107527e-05, "loss": 0.0665, "theoretical_loss": 3.3319568022556654, "tokens_seen": 3212050432 }, { "epoch": 0.97, "learning_rate": 2.680147648852512e-05, "loss": 0.0662, "theoretical_loss": 3.331936360482826, "tokens_seen": 3212312576 }, { "epoch": 0.97, "learning_rate": 2.6721232546942708e-05, "loss": 0.0634, "theoretical_loss": 3.3319159208451343, "tokens_seen": 3212574720 }, { "epoch": 0.97, "learning_rate": 2.6640988605360294e-05, "loss": 0.066, "theoretical_loss": 3.3318954833421937, "tokens_seen": 3212836864 }, { "epoch": 0.97, "learning_rate": 2.6560744663777886e-05, "loss": 0.0626, "theoretical_loss": 3.3318750479736066, "tokens_seen": 3213099008 }, { "epoch": 0.97, "learning_rate": 2.6480500722195475e-05, "loss": 0.0622, "theoretical_loss": 3.3318546147389756, "tokens_seen": 3213361152 }, { "epoch": 0.97, "learning_rate": 2.6400256780613064e-05, "loss": 0.0634, "theoretical_loss": 3.3318341836379046, "tokens_seen": 3213623296 }, { "epoch": 0.97, "learning_rate": 2.6320012839030656e-05, "loss": 0.0646, "theoretical_loss": 3.3318137546699966, "tokens_seen": 3213885440 }, { "epoch": 0.97, "learning_rate": 2.6239768897448245e-05, "loss": 0.0625, "theoretical_loss": 3.3317933278348546, "tokens_seen": 3214147584 }, { "epoch": 0.97, "learning_rate": 2.615952495586583e-05, "loss": 0.062, "theoretical_loss": 3.331772903132083, "tokens_seen": 3214409728 }, { "epoch": 0.97, "learning_rate": 2.6079281014283423e-05, "loss": 0.0607, "theoretical_loss": 3.331752480561284, "tokens_seen": 3214671872 }, { "epoch": 0.97, "learning_rate": 2.599903707270101e-05, "loss": 0.0625, "theoretical_loss": 3.3317320601220617, "tokens_seen": 3214934016 }, { "epoch": 0.97, "learning_rate": 2.59187931311186e-05, "loss": 0.065, "theoretical_loss": 3.33171164181402, "tokens_seen": 3215196160 }, { "epoch": 0.97, "learning_rate": 2.5838549189536193e-05, "loss": 0.0628, "theoretical_loss": 3.331691225636763, "tokens_seen": 3215458304 }, { "epoch": 0.97, "learning_rate": 2.575830524795378e-05, "loss": 0.0643, "theoretical_loss": 3.331670811589894, "tokens_seen": 3215720448 }, { "epoch": 0.97, "learning_rate": 2.5678061306371367e-05, "loss": 0.0622, "theoretical_loss": 3.3316503996730176, "tokens_seen": 3215982592 }, { "epoch": 0.97, "learning_rate": 2.559781736478896e-05, "loss": 0.0642, "theoretical_loss": 3.3316299898857373, "tokens_seen": 3216244736 }, { "epoch": 0.97, "learning_rate": 2.5517573423206548e-05, "loss": 0.0632, "theoretical_loss": 3.3316095822276584, "tokens_seen": 3216506880 }, { "epoch": 0.97, "learning_rate": 2.5437329481624137e-05, "loss": 0.064, "theoretical_loss": 3.3315891766983845, "tokens_seen": 3216769024 }, { "epoch": 0.97, "learning_rate": 2.535708554004173e-05, "loss": 0.0651, "theoretical_loss": 3.3315687732975205, "tokens_seen": 3217031168 }, { "epoch": 0.97, "learning_rate": 2.5276841598459315e-05, "loss": 0.0616, "theoretical_loss": 3.3315483720246704, "tokens_seen": 3217293312 }, { "epoch": 0.98, "learning_rate": 2.5196597656876904e-05, "loss": 0.0657, "theoretical_loss": 3.3315279728794396, "tokens_seen": 3217555456 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.00010754424874903634, "objective/train/docs_used": 1169144, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.370460867881775, "objective/train/original_loss": 1.3704607486724854, "objective/train/theoretical_loss": 3.3315075758614325, "objective/train/tokens_used": 3238277600, "objective/train/value_avg": -0.007785797119140625, "objective/train/value_loss": 0.0004237206594552845, "objective/train/value_max": -1.8477439880371094e-05, "objective/train/value_min": -0.67578125, "objective/train/value_reward_corr": 0.6450809449913903, "objective/train/value_std": 0.015899658203125, "objective/train/weight_avg": 1.0002824068069458, "objective/train/weighted_lm_loss": 1.3699983358383179, "objective/train/weights_max": 1.5724527835845947, "objective/train/weights_min": 0.2259981781244278, "theoretical_loss": 3.3315075758614325, "tokens_seen": 3217817600 }, { "epoch": 0.98, "learning_rate": 2.5116353715294496e-05, "loss": 0.0674, "theoretical_loss": 3.3315075758614325, "tokens_seen": 3217817600 }, { "epoch": 0.98, "learning_rate": 2.5036109773712085e-05, "loss": 0.0652, "theoretical_loss": 3.331487180970254, "tokens_seen": 3218079744 }, { "epoch": 0.98, "learning_rate": 2.4955865832129674e-05, "loss": 0.0657, "theoretical_loss": 3.331466788205509, "tokens_seen": 3218341888 }, { "epoch": 0.98, "learning_rate": 2.4875621890547266e-05, "loss": 0.0608, "theoretical_loss": 3.331446397566803, "tokens_seen": 3218604032 }, { "epoch": 0.98, "learning_rate": 2.4795377948964852e-05, "loss": 0.0646, "theoretical_loss": 3.3314260090537413, "tokens_seen": 3218866176 }, { "epoch": 0.98, "learning_rate": 2.471513400738244e-05, "loss": 0.0617, "theoretical_loss": 3.3314056226659283, "tokens_seen": 3219128320 }, { "epoch": 0.98, "learning_rate": 2.4634890065800033e-05, "loss": 0.0648, "theoretical_loss": 3.3313852384029707, "tokens_seen": 3219390464 }, { "epoch": 0.98, "learning_rate": 2.4554646124217622e-05, "loss": 0.0627, "theoretical_loss": 3.331364856264473, "tokens_seen": 3219652608 }, { "epoch": 0.98, "learning_rate": 2.447440218263521e-05, "loss": 0.0652, "theoretical_loss": 3.3313444762500413, "tokens_seen": 3219914752 }, { "epoch": 0.98, "learning_rate": 2.4394158241052803e-05, "loss": 0.0643, "theoretical_loss": 3.331324098359281, "tokens_seen": 3220176896 }, { "epoch": 0.98, "learning_rate": 2.431391429947039e-05, "loss": 0.0645, "theoretical_loss": 3.331303722591799, "tokens_seen": 3220439040 }, { "epoch": 0.98, "learning_rate": 2.4233670357887977e-05, "loss": 0.0663, "theoretical_loss": 3.3312833489472, "tokens_seen": 3220701184 }, { "epoch": 0.98, "learning_rate": 2.415342641630557e-05, "loss": 0.065, "theoretical_loss": 3.331262977425091, "tokens_seen": 3220963328 }, { "epoch": 0.98, "learning_rate": 2.407318247472316e-05, "loss": 0.0635, "theoretical_loss": 3.331242608025077, "tokens_seen": 3221225472 }, { "epoch": 0.98, "learning_rate": 2.399293853314075e-05, "loss": 0.064, "theoretical_loss": 3.331222240746765, "tokens_seen": 3221487616 }, { "epoch": 0.98, "learning_rate": 2.391269459155834e-05, "loss": 0.0651, "theoretical_loss": 3.331201875589762, "tokens_seen": 3221749760 }, { "epoch": 0.98, "learning_rate": 2.3832450649975925e-05, "loss": 0.0613, "theoretical_loss": 3.3311815125536737, "tokens_seen": 3222011904 }, { "epoch": 0.98, "learning_rate": 2.3752206708393518e-05, "loss": 0.0632, "theoretical_loss": 3.3311611516381063, "tokens_seen": 3222274048 }, { "epoch": 0.98, "learning_rate": 2.3671962766811106e-05, "loss": 0.0638, "theoretical_loss": 3.3311407928426675, "tokens_seen": 3222536192 }, { "epoch": 0.98, "learning_rate": 2.3591718825228695e-05, "loss": 0.0637, "theoretical_loss": 3.3311204361669637, "tokens_seen": 3222798336 }, { "epoch": 0.98, "learning_rate": 2.3511474883646288e-05, "loss": 0.0648, "theoretical_loss": 3.331100081610602, "tokens_seen": 3223060480 }, { "epoch": 0.98, "learning_rate": 2.3431230942063876e-05, "loss": 0.0636, "theoretical_loss": 3.3310797291731884, "tokens_seen": 3223322624 }, { "epoch": 0.98, "learning_rate": 2.3350987000481462e-05, "loss": 0.0662, "theoretical_loss": 3.3310593788543312, "tokens_seen": 3223584768 }, { "epoch": 0.98, "learning_rate": 2.3270743058899054e-05, "loss": 0.0618, "theoretical_loss": 3.331039030653637, "tokens_seen": 3223846912 }, { "epoch": 0.98, "learning_rate": 2.3190499117316643e-05, "loss": 0.0652, "theoretical_loss": 3.3310186845707137, "tokens_seen": 3224109056 }, { "epoch": 0.98, "objective/train/advantage_avg": 7.147617725422606e-05, "objective/train/docs_used": 1171684, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3322689533233643, "objective/train/original_loss": 1.3322687149047852, "objective/train/theoretical_loss": 3.330998340605168, "objective/train/tokens_used": 3244831200, "objective/train/value_avg": -0.0081939697265625, "objective/train/value_loss": 0.0003085145144723356, "objective/train/value_max": -2.1755695343017578e-05, "objective/train/value_min": -0.78173828125, "objective/train/value_reward_corr": 0.7559338916277085, "objective/train/value_std": 0.0196380615234375, "objective/train/weight_avg": 1.0002161264419556, "objective/train/weighted_lm_loss": 1.3316999673843384, "objective/train/weights_max": 1.965567946434021, "objective/train/weights_min": 0.3757654130458832, "theoretical_loss": 3.330998340605168, "tokens_seen": 3224371200 }, { "epoch": 0.98, "learning_rate": 2.3110255175734232e-05, "loss": 0.0658, "theoretical_loss": 3.330998340605168, "tokens_seen": 3224371200 }, { "epoch": 0.98, "learning_rate": 2.3030011234151824e-05, "loss": 0.0654, "theoretical_loss": 3.3309779987566075, "tokens_seen": 3224633344 }, { "epoch": 0.98, "learning_rate": 2.2949767292569413e-05, "loss": 0.0633, "theoretical_loss": 3.330957659024641, "tokens_seen": 3224895488 }, { "epoch": 0.98, "learning_rate": 2.2869523350987e-05, "loss": 0.0625, "theoretical_loss": 3.330937321408874, "tokens_seen": 3225157632 }, { "epoch": 0.98, "learning_rate": 2.278927940940459e-05, "loss": 0.0639, "theoretical_loss": 3.3309169859089165, "tokens_seen": 3225419776 }, { "epoch": 0.98, "learning_rate": 2.270903546782218e-05, "loss": 0.0646, "theoretical_loss": 3.3308966525243755, "tokens_seen": 3225681920 }, { "epoch": 0.98, "learning_rate": 2.262879152623977e-05, "loss": 0.0659, "theoretical_loss": 3.330876321254859, "tokens_seen": 3225944064 }, { "epoch": 0.98, "learning_rate": 2.254854758465736e-05, "loss": 0.0625, "theoretical_loss": 3.3308559920999756, "tokens_seen": 3226206208 }, { "epoch": 0.98, "learning_rate": 2.246830364307495e-05, "loss": 0.0643, "theoretical_loss": 3.3308356650593334, "tokens_seen": 3226468352 }, { "epoch": 0.98, "learning_rate": 2.2388059701492536e-05, "loss": 0.0649, "theoretical_loss": 3.33081534013254, "tokens_seen": 3226730496 }, { "epoch": 0.98, "learning_rate": 2.2307815759910128e-05, "loss": 0.0618, "theoretical_loss": 3.3307950173192054, "tokens_seen": 3226992640 }, { "epoch": 0.98, "learning_rate": 2.2227571818327717e-05, "loss": 0.0635, "theoretical_loss": 3.3307746966189367, "tokens_seen": 3227254784 }, { "epoch": 0.98, "learning_rate": 2.2147327876745306e-05, "loss": 0.0608, "theoretical_loss": 3.3307543780313433, "tokens_seen": 3227516928 }, { "epoch": 0.98, "learning_rate": 2.2067083935162898e-05, "loss": 0.0662, "theoretical_loss": 3.330734061556034, "tokens_seen": 3227779072 }, { "epoch": 0.98, "learning_rate": 2.1986839993580487e-05, "loss": 0.0648, "theoretical_loss": 3.3307137471926174, "tokens_seen": 3228041216 }, { "epoch": 0.98, "learning_rate": 2.1906596051998072e-05, "loss": 0.0652, "theoretical_loss": 3.330693434940703, "tokens_seen": 3228303360 }, { "epoch": 0.98, "learning_rate": 2.1826352110415665e-05, "loss": 0.0666, "theoretical_loss": 3.3306731247998993, "tokens_seen": 3228565504 }, { "epoch": 0.98, "learning_rate": 2.1746108168833253e-05, "loss": 0.0633, "theoretical_loss": 3.330652816769816, "tokens_seen": 3228827648 }, { "epoch": 0.98, "learning_rate": 2.1665864227250842e-05, "loss": 0.0669, "theoretical_loss": 3.330632510850062, "tokens_seen": 3229089792 }, { "epoch": 0.98, "learning_rate": 2.1585620285668435e-05, "loss": 0.0644, "theoretical_loss": 3.3306122070402466, "tokens_seen": 3229351936 }, { "epoch": 0.98, "learning_rate": 2.1505376344086024e-05, "loss": 0.0634, "theoretical_loss": 3.33059190533998, "tokens_seen": 3229614080 }, { "epoch": 0.98, "learning_rate": 2.142513240250361e-05, "loss": 0.0646, "theoretical_loss": 3.3305716057488706, "tokens_seen": 3229876224 }, { "epoch": 0.98, "learning_rate": 2.13448884609212e-05, "loss": 0.0627, "theoretical_loss": 3.33055130826653, "tokens_seen": 3230138368 }, { "epoch": 0.98, "learning_rate": 2.126464451933879e-05, "loss": 0.0615, "theoretical_loss": 3.3305310128925663, "tokens_seen": 3230400512 }, { "epoch": 0.98, "learning_rate": 2.118440057775638e-05, "loss": 0.0653, "theoretical_loss": 3.33051071962659, "tokens_seen": 3230662656 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.0006097581936046481, "objective/train/docs_used": 1174146, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.445149302482605, "objective/train/original_loss": 1.4451491832733154, "objective/train/theoretical_loss": 3.3304904284682113, "objective/train/tokens_used": 3251384800, "objective/train/value_avg": -0.006221771240234375, "objective/train/value_loss": 0.00014050577010493726, "objective/train/value_max": -1.9252300262451172e-05, "objective/train/value_min": -0.70556640625, "objective/train/value_reward_corr": 0.7495420834654312, "objective/train/value_std": 0.01364898681640625, "objective/train/weight_avg": 1.000675082206726, "objective/train/weighted_lm_loss": 1.4458887577056885, "objective/train/weights_max": 1.349595308303833, "objective/train/weights_min": 0.36818361282348633, "theoretical_loss": 3.3304904284682113, "tokens_seen": 3230924800 }, { "epoch": 0.98, "learning_rate": 2.110415663617397e-05, "loss": 0.0668, "theoretical_loss": 3.3304904284682113, "tokens_seen": 3230924800 }, { "epoch": 0.98, "learning_rate": 2.1023912694591557e-05, "loss": 0.0652, "theoretical_loss": 3.3304701394170406, "tokens_seen": 3231186944 }, { "epoch": 0.98, "learning_rate": 2.0943668753009146e-05, "loss": 0.064, "theoretical_loss": 3.3304498524726873, "tokens_seen": 3231449088 }, { "epoch": 0.98, "learning_rate": 2.0863424811426738e-05, "loss": 0.0639, "theoretical_loss": 3.3304295676347624, "tokens_seen": 3231711232 }, { "epoch": 0.98, "learning_rate": 2.0783180869844327e-05, "loss": 0.0643, "theoretical_loss": 3.3304092849028764, "tokens_seen": 3231973376 }, { "epoch": 0.98, "learning_rate": 2.0702936928261916e-05, "loss": 0.0676, "theoretical_loss": 3.3303890042766393, "tokens_seen": 3232235520 }, { "epoch": 0.98, "learning_rate": 2.0622692986679508e-05, "loss": 0.0656, "theoretical_loss": 3.3303687257556622, "tokens_seen": 3232497664 }, { "epoch": 0.98, "learning_rate": 2.0542449045097094e-05, "loss": 0.0628, "theoretical_loss": 3.3303484493395556, "tokens_seen": 3232759808 }, { "epoch": 0.98, "learning_rate": 2.0462205103514683e-05, "loss": 0.0637, "theoretical_loss": 3.3303281750279305, "tokens_seen": 3233021952 }, { "epoch": 0.98, "learning_rate": 2.0381961161932275e-05, "loss": 0.0626, "theoretical_loss": 3.3303079028203983, "tokens_seen": 3233284096 }, { "epoch": 0.98, "learning_rate": 2.0301717220349864e-05, "loss": 0.0655, "theoretical_loss": 3.330287632716569, "tokens_seen": 3233546240 }, { "epoch": 0.98, "learning_rate": 2.0221473278767453e-05, "loss": 0.0643, "theoretical_loss": 3.330267364716055, "tokens_seen": 3233808384 }, { "epoch": 0.98, "learning_rate": 2.0141229337185045e-05, "loss": 0.0652, "theoretical_loss": 3.3302470988184667, "tokens_seen": 3234070528 }, { "epoch": 0.98, "learning_rate": 2.006098539560263e-05, "loss": 0.0634, "theoretical_loss": 3.3302268350234154, "tokens_seen": 3234332672 }, { "epoch": 0.98, "learning_rate": 1.998074145402022e-05, "loss": 0.0644, "theoretical_loss": 3.3302065733305133, "tokens_seen": 3234594816 }, { "epoch": 0.98, "learning_rate": 1.990049751243781e-05, "loss": 0.0652, "theoretical_loss": 3.3301863137393717, "tokens_seen": 3234856960 }, { "epoch": 0.98, "learning_rate": 1.98202535708554e-05, "loss": 0.0619, "theoretical_loss": 3.330166056249602, "tokens_seen": 3235119104 }, { "epoch": 0.98, "learning_rate": 1.9740009629272993e-05, "loss": 0.0659, "theoretical_loss": 3.3301458008608162, "tokens_seen": 3235381248 }, { "epoch": 0.98, "learning_rate": 1.965976568769058e-05, "loss": 0.0637, "theoretical_loss": 3.3301255475726266, "tokens_seen": 3235643392 }, { "epoch": 0.98, "learning_rate": 1.9579521746108167e-05, "loss": 0.0642, "theoretical_loss": 3.3301052963846445, "tokens_seen": 3235905536 }, { "epoch": 0.98, "learning_rate": 1.949927780452576e-05, "loss": 0.0654, "theoretical_loss": 3.3300850472964822, "tokens_seen": 3236167680 }, { "epoch": 0.98, "learning_rate": 1.941903386294335e-05, "loss": 0.0619, "theoretical_loss": 3.3300648003077518, "tokens_seen": 3236429824 }, { "epoch": 0.98, "learning_rate": 1.9338789921360937e-05, "loss": 0.0651, "theoretical_loss": 3.330044555418066, "tokens_seen": 3236691968 }, { "epoch": 0.98, "learning_rate": 1.925854597977853e-05, "loss": 0.0652, "theoretical_loss": 3.330024312627037, "tokens_seen": 3236954112 }, { "epoch": 0.98, "learning_rate": 1.917830203819612e-05, "loss": 0.0643, "theoretical_loss": 3.330004071934278, "tokens_seen": 3237216256 }, { "epoch": 0.98, "objective/train/advantage_avg": -0.0003025984624400735, "objective/train/docs_used": 1176230, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.234129548072815, "objective/train/original_loss": 1.2341294288635254, "objective/train/theoretical_loss": 3.3299838333394, "objective/train/tokens_used": 3257938400, "objective/train/value_avg": -0.0086822509765625, "objective/train/value_loss": 0.0002719620242714882, "objective/train/value_max": -1.9371509552001953e-05, "objective/train/value_min": -0.94677734375, "objective/train/value_reward_corr": 0.8143104825805222, "objective/train/value_std": 0.0205230712890625, "objective/train/weight_avg": 0.9998201727867126, "objective/train/weighted_lm_loss": 1.2336772680282593, "objective/train/weights_max": 1.4542574882507324, "objective/train/weights_min": 0.23121991753578186, "theoretical_loss": 3.3299838333394, "tokens_seen": 3237478400 }, { "epoch": 0.98, "learning_rate": 1.9098058096613704e-05, "loss": 0.0632, "theoretical_loss": 3.3299838333394, "tokens_seen": 3237478400 }, { "epoch": 0.98, "learning_rate": 1.9017814155031296e-05, "loss": 0.0666, "theoretical_loss": 3.329963596842017, "tokens_seen": 3237740544 }, { "epoch": 0.98, "learning_rate": 1.8937570213448885e-05, "loss": 0.0629, "theoretical_loss": 3.3299433624417416, "tokens_seen": 3238002688 }, { "epoch": 0.98, "learning_rate": 1.8857326271866474e-05, "loss": 0.0609, "theoretical_loss": 3.3299231301381864, "tokens_seen": 3238264832 }, { "epoch": 0.98, "learning_rate": 1.8777082330284066e-05, "loss": 0.0617, "theoretical_loss": 3.3299028999309646, "tokens_seen": 3238526976 }, { "epoch": 0.98, "learning_rate": 1.8696838388701655e-05, "loss": 0.0624, "theoretical_loss": 3.3298826718196897, "tokens_seen": 3238789120 }, { "epoch": 0.98, "learning_rate": 1.861659444711924e-05, "loss": 0.064, "theoretical_loss": 3.3298624458039745, "tokens_seen": 3239051264 }, { "epoch": 0.98, "learning_rate": 1.8536350505536833e-05, "loss": 0.0624, "theoretical_loss": 3.3298422218834323, "tokens_seen": 3239313408 }, { "epoch": 0.98, "learning_rate": 1.8456106563954422e-05, "loss": 0.0644, "theoretical_loss": 3.3298220000576766, "tokens_seen": 3239575552 }, { "epoch": 0.98, "learning_rate": 1.837586262237201e-05, "loss": 0.0637, "theoretical_loss": 3.329801780326321, "tokens_seen": 3239837696 }, { "epoch": 0.98, "learning_rate": 1.8295618680789603e-05, "loss": 0.0625, "theoretical_loss": 3.3297815626889795, "tokens_seen": 3240099840 }, { "epoch": 0.98, "learning_rate": 1.8215374739207192e-05, "loss": 0.064, "theoretical_loss": 3.329761347145265, "tokens_seen": 3240361984 }, { "epoch": 0.98, "learning_rate": 1.8135130797624777e-05, "loss": 0.064, "theoretical_loss": 3.3297411336947924, "tokens_seen": 3240624128 }, { "epoch": 0.98, "learning_rate": 1.805488685604237e-05, "loss": 0.0641, "theoretical_loss": 3.3297209223371746, "tokens_seen": 3240886272 }, { "epoch": 0.98, "learning_rate": 1.797464291445996e-05, "loss": 0.0652, "theoretical_loss": 3.329700713072026, "tokens_seen": 3241148416 }, { "epoch": 0.98, "learning_rate": 1.7894398972877548e-05, "loss": 0.0662, "theoretical_loss": 3.3296805058989616, "tokens_seen": 3241410560 }, { "epoch": 0.98, "learning_rate": 1.781415503129514e-05, "loss": 0.0665, "theoretical_loss": 3.3296603008175945, "tokens_seen": 3241672704 }, { "epoch": 0.98, "learning_rate": 1.773391108971273e-05, "loss": 0.0627, "theoretical_loss": 3.32964009782754, "tokens_seen": 3241934848 }, { "epoch": 0.98, "learning_rate": 1.7653667148130314e-05, "loss": 0.0644, "theoretical_loss": 3.3296198969284116, "tokens_seen": 3242196992 }, { "epoch": 0.98, "learning_rate": 1.7573423206547907e-05, "loss": 0.0625, "theoretical_loss": 3.3295996981198246, "tokens_seen": 3242459136 }, { "epoch": 0.98, "learning_rate": 1.7493179264965495e-05, "loss": 0.0645, "theoretical_loss": 3.329579501401393, "tokens_seen": 3242721280 }, { "epoch": 0.98, "learning_rate": 1.7412935323383084e-05, "loss": 0.0627, "theoretical_loss": 3.3295593067727323, "tokens_seen": 3242983424 }, { "epoch": 0.98, "learning_rate": 1.7332691381800677e-05, "loss": 0.0627, "theoretical_loss": 3.329539114233457, "tokens_seen": 3243245568 }, { "epoch": 0.98, "learning_rate": 1.7252447440218265e-05, "loss": 0.0635, "theoretical_loss": 3.3295189237831826, "tokens_seen": 3243507712 }, { "epoch": 0.98, "learning_rate": 1.717220349863585e-05, "loss": 0.0628, "theoretical_loss": 3.329498735421523, "tokens_seen": 3243769856 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.0003678802167996764, "objective/train/docs_used": 1178511, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2842475175857544, "objective/train/original_loss": 1.2842473983764648, "objective/train/theoretical_loss": 3.3294785491480945, "objective/train/tokens_used": 3264492000, "objective/train/value_avg": -0.00807952880859375, "objective/train/value_loss": 0.0004038362530991435, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.525390625, "objective/train/value_reward_corr": 0.6182115186902631, "objective/train/value_std": 0.0156707763671875, "objective/train/weight_avg": 1.0005180835723877, "objective/train/weighted_lm_loss": 1.284083366394043, "objective/train/weights_max": 1.6044148206710815, "objective/train/weights_min": 0.053332287818193436, "theoretical_loss": 3.3294785491480945, "tokens_seen": 3244032000 }, { "epoch": 0.98, "learning_rate": 1.7091959557053443e-05, "loss": 0.0644, "theoretical_loss": 3.3294785491480945, "tokens_seen": 3244032000 }, { "epoch": 0.98, "learning_rate": 1.7011715615471032e-05, "loss": 0.0638, "theoretical_loss": 3.3294583649625116, "tokens_seen": 3244294144 }, { "epoch": 0.98, "learning_rate": 1.693147167388862e-05, "loss": 0.0637, "theoretical_loss": 3.3294381828643904, "tokens_seen": 3244556288 }, { "epoch": 0.98, "learning_rate": 1.6851227732306213e-05, "loss": 0.063, "theoretical_loss": 3.329418002853346, "tokens_seen": 3244818432 }, { "epoch": 0.98, "learning_rate": 1.67709837907238e-05, "loss": 0.0614, "theoretical_loss": 3.3293978249289937, "tokens_seen": 3245080576 }, { "epoch": 0.98, "learning_rate": 1.6690739849141388e-05, "loss": 0.064, "theoretical_loss": 3.32937764909095, "tokens_seen": 3245342720 }, { "epoch": 0.98, "learning_rate": 1.661049590755898e-05, "loss": 0.0639, "theoretical_loss": 3.32935747533883, "tokens_seen": 3245604864 }, { "epoch": 0.98, "learning_rate": 1.653025196597657e-05, "loss": 0.0629, "theoretical_loss": 3.32933730367225, "tokens_seen": 3245867008 }, { "epoch": 0.98, "learning_rate": 1.6450008024394158e-05, "loss": 0.065, "theoretical_loss": 3.329317134090825, "tokens_seen": 3246129152 }, { "epoch": 0.98, "learning_rate": 1.636976408281175e-05, "loss": 0.0655, "theoretical_loss": 3.3292969665941725, "tokens_seen": 3246391296 }, { "epoch": 0.98, "learning_rate": 1.6289520141229336e-05, "loss": 0.0638, "theoretical_loss": 3.3292768011819076, "tokens_seen": 3246653440 }, { "epoch": 0.98, "learning_rate": 1.6209276199646925e-05, "loss": 0.0641, "theoretical_loss": 3.3292566378536472, "tokens_seen": 3246915584 }, { "epoch": 0.98, "learning_rate": 1.6129032258064517e-05, "loss": 0.0652, "theoretical_loss": 3.329236476609008, "tokens_seen": 3247177728 }, { "epoch": 0.98, "learning_rate": 1.6048788316482106e-05, "loss": 0.0652, "theoretical_loss": 3.3292163174476057, "tokens_seen": 3247439872 }, { "epoch": 0.98, "learning_rate": 1.5968544374899695e-05, "loss": 0.0656, "theoretical_loss": 3.329196160369057, "tokens_seen": 3247702016 }, { "epoch": 0.98, "learning_rate": 1.5888300433317287e-05, "loss": 0.0654, "theoretical_loss": 3.3291760053729798, "tokens_seen": 3247964160 }, { "epoch": 0.98, "learning_rate": 1.5808056491734872e-05, "loss": 0.0648, "theoretical_loss": 3.329155852458989, "tokens_seen": 3248226304 }, { "epoch": 0.98, "learning_rate": 1.572781255015246e-05, "loss": 0.0653, "theoretical_loss": 3.329135701626703, "tokens_seen": 3248488448 }, { "epoch": 0.98, "learning_rate": 1.5647568608570054e-05, "loss": 0.0654, "theoretical_loss": 3.329115552875738, "tokens_seen": 3248750592 }, { "epoch": 0.98, "learning_rate": 1.5567324666987642e-05, "loss": 0.0667, "theoretical_loss": 3.3290954062057114, "tokens_seen": 3249012736 }, { "epoch": 0.98, "learning_rate": 1.548708072540523e-05, "loss": 0.0651, "theoretical_loss": 3.3290752616162402, "tokens_seen": 3249274880 }, { "epoch": 0.98, "learning_rate": 1.540683678382282e-05, "loss": 0.0652, "theoretical_loss": 3.3290551191069424, "tokens_seen": 3249537024 }, { "epoch": 0.98, "learning_rate": 1.5326592842240413e-05, "loss": 0.0648, "theoretical_loss": 3.3290349786774347, "tokens_seen": 3249799168 }, { "epoch": 0.98, "learning_rate": 1.5246348900658001e-05, "loss": 0.065, "theoretical_loss": 3.3290148403273347, "tokens_seen": 3250061312 }, { "epoch": 0.98, "learning_rate": 1.516610495907559e-05, "loss": 0.0609, "theoretical_loss": 3.3289947040562606, "tokens_seen": 3250323456 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.000409378350013867, "objective/train/docs_used": 1181030, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2445560693740845, "objective/train/original_loss": 1.244555950164795, "objective/train/theoretical_loss": 3.3289745698638296, "objective/train/tokens_used": 3271045600, "objective/train/value_avg": -0.0084991455078125, "objective/train/value_loss": 0.00021049527276773006, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.2293701171875, "objective/train/value_reward_corr": 0.7449387464870404, "objective/train/value_std": 0.016754150390625, "objective/train/weight_avg": 1.0005031824111938, "objective/train/weighted_lm_loss": 1.2445323467254639, "objective/train/weights_max": 1.112181305885315, "objective/train/weights_min": 0.36861130595207214, "theoretical_loss": 3.3289745698638296, "tokens_seen": 3250585600 }, { "epoch": 0.99, "learning_rate": 1.508586101749318e-05, "loss": 0.064, "theoretical_loss": 3.3289745698638296, "tokens_seen": 3250585600 }, { "epoch": 0.99, "learning_rate": 1.500561707591077e-05, "loss": 0.0636, "theoretical_loss": 3.328954437749659, "tokens_seen": 3250847744 }, { "epoch": 0.99, "learning_rate": 1.4925373134328359e-05, "loss": 0.0631, "theoretical_loss": 3.328934307713368, "tokens_seen": 3251109888 }, { "epoch": 0.99, "learning_rate": 1.4845129192745948e-05, "loss": 0.0642, "theoretical_loss": 3.328914179754573, "tokens_seen": 3251372032 }, { "epoch": 0.99, "learning_rate": 1.4764885251163536e-05, "loss": 0.0634, "theoretical_loss": 3.328894053872894, "tokens_seen": 3251634176 }, { "epoch": 0.99, "learning_rate": 1.4684641309581127e-05, "loss": 0.0623, "theoretical_loss": 3.3288739300679477, "tokens_seen": 3251896320 }, { "epoch": 0.99, "learning_rate": 1.4604397367998716e-05, "loss": 0.0629, "theoretical_loss": 3.3288538083393533, "tokens_seen": 3252158464 }, { "epoch": 0.99, "learning_rate": 1.4524153426416305e-05, "loss": 0.0661, "theoretical_loss": 3.328833688686729, "tokens_seen": 3252420608 }, { "epoch": 0.99, "learning_rate": 1.4443909484833895e-05, "loss": 0.0646, "theoretical_loss": 3.328813571109693, "tokens_seen": 3252682752 }, { "epoch": 0.99, "learning_rate": 1.4363665543251484e-05, "loss": 0.0621, "theoretical_loss": 3.3287934556078644, "tokens_seen": 3252944896 }, { "epoch": 0.99, "learning_rate": 1.4283421601669073e-05, "loss": 0.062, "theoretical_loss": 3.3287733421808614, "tokens_seen": 3253207040 }, { "epoch": 0.99, "learning_rate": 1.4203177660086664e-05, "loss": 0.0658, "theoretical_loss": 3.328753230828303, "tokens_seen": 3253469184 }, { "epoch": 0.99, "learning_rate": 1.4122933718504254e-05, "loss": 0.0642, "theoretical_loss": 3.3287331215498086, "tokens_seen": 3253731328 }, { "epoch": 0.99, "learning_rate": 1.4042689776921842e-05, "loss": 0.063, "theoretical_loss": 3.328713014344997, "tokens_seen": 3253993472 }, { "epoch": 0.99, "learning_rate": 1.3962445835339432e-05, "loss": 0.0658, "theoretical_loss": 3.328692909213487, "tokens_seen": 3254255616 }, { "epoch": 0.99, "learning_rate": 1.3882201893757023e-05, "loss": 0.0607, "theoretical_loss": 3.3286728061548976, "tokens_seen": 3254517760 }, { "epoch": 0.99, "learning_rate": 1.380195795217461e-05, "loss": 0.0624, "theoretical_loss": 3.328652705168849, "tokens_seen": 3254779904 }, { "epoch": 0.99, "learning_rate": 1.37217140105922e-05, "loss": 0.0632, "theoretical_loss": 3.3286326062549603, "tokens_seen": 3255042048 }, { "epoch": 0.99, "learning_rate": 1.3641470069009791e-05, "loss": 0.0657, "theoretical_loss": 3.3286125094128507, "tokens_seen": 3255304192 }, { "epoch": 0.99, "learning_rate": 1.3561226127427378e-05, "loss": 0.0604, "theoretical_loss": 3.32859241464214, "tokens_seen": 3255566336 }, { "epoch": 0.99, "learning_rate": 1.3480982185844969e-05, "loss": 0.0635, "theoretical_loss": 3.328572321942448, "tokens_seen": 3255828480 }, { "epoch": 0.99, "learning_rate": 1.340073824426256e-05, "loss": 0.0633, "theoretical_loss": 3.328552231313395, "tokens_seen": 3256090624 }, { "epoch": 0.99, "learning_rate": 1.3320494302680147e-05, "loss": 0.0634, "theoretical_loss": 3.3285321427545997, "tokens_seen": 3256352768 }, { "epoch": 0.99, "learning_rate": 1.3240250361097737e-05, "loss": 0.0663, "theoretical_loss": 3.3285120562656836, "tokens_seen": 3256614912 }, { "epoch": 0.99, "learning_rate": 1.3160006419515328e-05, "loss": 0.0654, "theoretical_loss": 3.3284919718462653, "tokens_seen": 3256877056 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.00017991720233112574, "objective/train/docs_used": 1183352, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2570805549621582, "objective/train/original_loss": 1.2570806741714478, "objective/train/theoretical_loss": 3.3284718894959666, "objective/train/tokens_used": 3277599200, "objective/train/value_avg": -0.00946044921875, "objective/train/value_loss": 0.00020266363571863621, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.92529296875, "objective/train/value_reward_corr": 0.7796501256549795, "objective/train/value_std": 0.018463134765625, "objective/train/weight_avg": 1.0002756118774414, "objective/train/weighted_lm_loss": 1.2571203708648682, "objective/train/weights_max": 1.4121947288513184, "objective/train/weights_min": 0.3786547780036926, "theoretical_loss": 3.3284718894959666, "tokens_seen": 3257139200 }, { "epoch": 0.99, "learning_rate": 1.3079762477932915e-05, "loss": 0.0655, "theoretical_loss": 3.3284718894959666, "tokens_seen": 3257139200 }, { "epoch": 0.99, "learning_rate": 1.2999518536350506e-05, "loss": 0.0616, "theoretical_loss": 3.328451809214407, "tokens_seen": 3257401344 }, { "epoch": 0.99, "learning_rate": 1.2919274594768096e-05, "loss": 0.0633, "theoretical_loss": 3.328431731001207, "tokens_seen": 3257663488 }, { "epoch": 0.99, "learning_rate": 1.2839030653185684e-05, "loss": 0.0645, "theoretical_loss": 3.328411654855987, "tokens_seen": 3257925632 }, { "epoch": 0.99, "learning_rate": 1.2758786711603274e-05, "loss": 0.0598, "theoretical_loss": 3.3283915807783675, "tokens_seen": 3258187776 }, { "epoch": 0.99, "learning_rate": 1.2678542770020865e-05, "loss": 0.062, "theoretical_loss": 3.3283715087679697, "tokens_seen": 3258449920 }, { "epoch": 0.99, "learning_rate": 1.2598298828438452e-05, "loss": 0.0616, "theoretical_loss": 3.3283514388244146, "tokens_seen": 3258712064 }, { "epoch": 0.99, "learning_rate": 1.2518054886856043e-05, "loss": 0.063, "theoretical_loss": 3.3283313709473226, "tokens_seen": 3258974208 }, { "epoch": 0.99, "learning_rate": 1.2437810945273633e-05, "loss": 0.0657, "theoretical_loss": 3.3283113051363147, "tokens_seen": 3259236352 }, { "epoch": 0.99, "learning_rate": 1.235756700369122e-05, "loss": 0.064, "theoretical_loss": 3.3282912413910126, "tokens_seen": 3259498496 }, { "epoch": 0.99, "learning_rate": 1.2277323062108811e-05, "loss": 0.0621, "theoretical_loss": 3.328271179711037, "tokens_seen": 3259760640 }, { "epoch": 0.99, "learning_rate": 1.2197079120526401e-05, "loss": 0.0631, "theoretical_loss": 3.3282511200960094, "tokens_seen": 3260022784 }, { "epoch": 0.99, "learning_rate": 1.2116835178943989e-05, "loss": 0.0622, "theoretical_loss": 3.3282310625455516, "tokens_seen": 3260284928 }, { "epoch": 0.99, "learning_rate": 1.203659123736158e-05, "loss": 0.062, "theoretical_loss": 3.3282110070592847, "tokens_seen": 3260547072 }, { "epoch": 0.99, "learning_rate": 1.195634729577917e-05, "loss": 0.0631, "theoretical_loss": 3.32819095363683, "tokens_seen": 3260809216 }, { "epoch": 0.99, "learning_rate": 1.1876103354196759e-05, "loss": 0.0623, "theoretical_loss": 3.3281709022778103, "tokens_seen": 3261071360 }, { "epoch": 0.99, "learning_rate": 1.1795859412614348e-05, "loss": 0.0615, "theoretical_loss": 3.3281508529818464, "tokens_seen": 3261333504 }, { "epoch": 0.99, "learning_rate": 1.1715615471031938e-05, "loss": 0.0622, "theoretical_loss": 3.3281308057485606, "tokens_seen": 3261595648 }, { "epoch": 0.99, "learning_rate": 1.1635371529449527e-05, "loss": 0.0628, "theoretical_loss": 3.3281107605775753, "tokens_seen": 3261857792 }, { "epoch": 0.99, "learning_rate": 1.1555127587867116e-05, "loss": 0.0626, "theoretical_loss": 3.328090717468512, "tokens_seen": 3262119936 }, { "epoch": 0.99, "learning_rate": 1.1474883646284707e-05, "loss": 0.0621, "theoretical_loss": 3.3280706764209933, "tokens_seen": 3262382080 }, { "epoch": 0.99, "learning_rate": 1.1394639704702296e-05, "loss": 0.0624, "theoretical_loss": 3.3280506374346417, "tokens_seen": 3262644224 }, { "epoch": 0.99, "learning_rate": 1.1314395763119884e-05, "loss": 0.0604, "theoretical_loss": 3.3280306005090794, "tokens_seen": 3262906368 }, { "epoch": 0.99, "learning_rate": 1.1234151821537475e-05, "loss": 0.0649, "theoretical_loss": 3.3280105656439285, "tokens_seen": 3263168512 }, { "epoch": 0.99, "learning_rate": 1.1153907879955064e-05, "loss": 0.0617, "theoretical_loss": 3.3279905328388124, "tokens_seen": 3263430656 }, { "epoch": 0.99, "objective/train/advantage_avg": -9.282096289098263e-05, "objective/train/docs_used": 1185836, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4554698467254639, "objective/train/original_loss": 1.4554697275161743, "objective/train/theoretical_loss": 3.327970502093353, "objective/train/tokens_used": 3284152800, "objective/train/value_avg": -0.0071563720703125, "objective/train/value_loss": 0.00044870967394672334, "objective/train/value_max": -1.722574234008789e-05, "objective/train/value_min": -0.908203125, "objective/train/value_reward_corr": 0.7138698933127893, "objective/train/value_std": 0.021087646484375, "objective/train/weight_avg": 1.000106930732727, "objective/train/weighted_lm_loss": 1.4546253681182861, "objective/train/weights_max": 2.210451126098633, "objective/train/weights_min": 0.3683115839958191, "theoretical_loss": 3.327970502093353, "tokens_seen": 3263692800 }, { "epoch": 0.99, "learning_rate": 1.1073663938372653e-05, "loss": 0.0655, "theoretical_loss": 3.327970502093353, "tokens_seen": 3263692800 }, { "epoch": 0.99, "learning_rate": 1.0993419996790243e-05, "loss": 0.0625, "theoretical_loss": 3.3279504734071743, "tokens_seen": 3263954944 }, { "epoch": 0.99, "learning_rate": 1.0913176055207832e-05, "loss": 0.0632, "theoretical_loss": 3.3279304467798987, "tokens_seen": 3264217088 }, { "epoch": 0.99, "learning_rate": 1.0832932113625421e-05, "loss": 0.062, "theoretical_loss": 3.3279104222111484, "tokens_seen": 3264479232 }, { "epoch": 0.99, "learning_rate": 1.0752688172043012e-05, "loss": 0.0647, "theoretical_loss": 3.3278903997005473, "tokens_seen": 3264741376 }, { "epoch": 0.99, "learning_rate": 1.06724442304606e-05, "loss": 0.0633, "theoretical_loss": 3.327870379247719, "tokens_seen": 3265003520 }, { "epoch": 0.99, "learning_rate": 1.059220028887819e-05, "loss": 0.0644, "theoretical_loss": 3.327850360852286, "tokens_seen": 3265265664 }, { "epoch": 0.99, "learning_rate": 1.0511956347295778e-05, "loss": 0.0637, "theoretical_loss": 3.327830344513872, "tokens_seen": 3265527808 }, { "epoch": 0.99, "learning_rate": 1.0431712405713369e-05, "loss": 0.0621, "theoretical_loss": 3.3278103302321007, "tokens_seen": 3265789952 }, { "epoch": 0.99, "learning_rate": 1.0351468464130958e-05, "loss": 0.0618, "theoretical_loss": 3.327790318006596, "tokens_seen": 3266052096 }, { "epoch": 0.99, "learning_rate": 1.0271224522548547e-05, "loss": 0.0623, "theoretical_loss": 3.327770307836981, "tokens_seen": 3266314240 }, { "epoch": 0.99, "learning_rate": 1.0190980580966137e-05, "loss": 0.063, "theoretical_loss": 3.3277502997228794, "tokens_seen": 3266576384 }, { "epoch": 0.99, "learning_rate": 1.0110736639383726e-05, "loss": 0.0639, "theoretical_loss": 3.327730293663916, "tokens_seen": 3266838528 }, { "epoch": 0.99, "learning_rate": 1.0030492697801315e-05, "loss": 0.0667, "theoretical_loss": 3.327710289659714, "tokens_seen": 3267100672 }, { "epoch": 0.99, "learning_rate": 9.950248756218906e-06, "loss": 0.0659, "theoretical_loss": 3.327690287709898, "tokens_seen": 3267362816 }, { "epoch": 0.99, "learning_rate": 9.870004814636496e-06, "loss": 0.0623, "theoretical_loss": 3.327670287814092, "tokens_seen": 3267624960 }, { "epoch": 0.99, "learning_rate": 9.789760873054084e-06, "loss": 0.0626, "theoretical_loss": 3.3276502899719205, "tokens_seen": 3267887104 }, { "epoch": 0.99, "learning_rate": 9.709516931471674e-06, "loss": 0.0639, "theoretical_loss": 3.3276302941830074, "tokens_seen": 3268149248 }, { "epoch": 0.99, "learning_rate": 9.629272989889265e-06, "loss": 0.0617, "theoretical_loss": 3.327610300446978, "tokens_seen": 3268411392 }, { "epoch": 0.99, "learning_rate": 9.549029048306852e-06, "loss": 0.0642, "theoretical_loss": 3.3275903087634564, "tokens_seen": 3268673536 }, { "epoch": 0.99, "learning_rate": 9.468785106724443e-06, "loss": 0.0621, "theoretical_loss": 3.327570319132067, "tokens_seen": 3268935680 }, { "epoch": 0.99, "learning_rate": 9.388541165142033e-06, "loss": 0.0662, "theoretical_loss": 3.3275503315524357, "tokens_seen": 3269197824 }, { "epoch": 0.99, "learning_rate": 9.30829722355962e-06, "loss": 0.0616, "theoretical_loss": 3.3275303460241865, "tokens_seen": 3269459968 }, { "epoch": 0.99, "learning_rate": 9.228053281977211e-06, "loss": 0.0636, "theoretical_loss": 3.327510362546944, "tokens_seen": 3269722112 }, { "epoch": 0.99, "learning_rate": 9.147809340394802e-06, "loss": 0.0636, "theoretical_loss": 3.3274903811203345, "tokens_seen": 3269984256 }, { "epoch": 0.99, "objective/train/advantage_avg": 5.63900321139954e-05, "objective/train/docs_used": 1188255, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3194350004196167, "objective/train/original_loss": 1.3194348812103271, "objective/train/theoretical_loss": 3.327470401743983, "objective/train/tokens_used": 3290706400, "objective/train/value_avg": -0.007358551025390625, "objective/train/value_loss": 0.00023028827854432166, "objective/train/value_max": -3.427267074584961e-05, "objective/train/value_min": -0.39013671875, "objective/train/value_reward_corr": 0.6854116826893848, "objective/train/value_std": 0.01427459716796875, "objective/train/weight_avg": 1.0001581907272339, "objective/train/weighted_lm_loss": 1.3194934129714966, "objective/train/weights_max": 1.4432260990142822, "objective/train/weights_min": 0.3683066666126251, "theoretical_loss": 3.327470401743983, "tokens_seen": 3270246400 }, { "epoch": 0.99, "learning_rate": 9.067565398812389e-06, "loss": 0.0637, "theoretical_loss": 3.327470401743983, "tokens_seen": 3270246400 }, { "epoch": 0.99, "learning_rate": 8.98732145722998e-06, "loss": 0.0615, "theoretical_loss": 3.327450424417514, "tokens_seen": 3270508544 }, { "epoch": 0.99, "learning_rate": 8.90707751564757e-06, "loss": 0.0646, "theoretical_loss": 3.3274304491405533, "tokens_seen": 3270770688 }, { "epoch": 0.99, "learning_rate": 8.826833574065157e-06, "loss": 0.0628, "theoretical_loss": 3.3274104759127265, "tokens_seen": 3271032832 }, { "epoch": 0.99, "learning_rate": 8.746589632482748e-06, "loss": 0.0633, "theoretical_loss": 3.3273905047336596, "tokens_seen": 3271294976 }, { "epoch": 0.99, "learning_rate": 8.666345690900338e-06, "loss": 0.0632, "theoretical_loss": 3.3273705356029772, "tokens_seen": 3271557120 }, { "epoch": 0.99, "learning_rate": 8.586101749317925e-06, "loss": 0.0642, "theoretical_loss": 3.327350568520306, "tokens_seen": 3271819264 }, { "epoch": 0.99, "learning_rate": 8.505857807735516e-06, "loss": 0.0625, "theoretical_loss": 3.327330603485272, "tokens_seen": 3272081408 }, { "epoch": 0.99, "learning_rate": 8.425613866153107e-06, "loss": 0.0629, "theoretical_loss": 3.3273106404975, "tokens_seen": 3272343552 }, { "epoch": 0.99, "learning_rate": 8.345369924570694e-06, "loss": 0.0631, "theoretical_loss": 3.3272906795566177, "tokens_seen": 3272605696 }, { "epoch": 0.99, "learning_rate": 8.265125982988284e-06, "loss": 0.063, "theoretical_loss": 3.3272707206622503, "tokens_seen": 3272867840 }, { "epoch": 0.99, "learning_rate": 8.184882041405875e-06, "loss": 0.0629, "theoretical_loss": 3.327250763814024, "tokens_seen": 3273129984 }, { "epoch": 0.99, "learning_rate": 8.104638099823462e-06, "loss": 0.0642, "theoretical_loss": 3.327230809011566, "tokens_seen": 3273392128 }, { "epoch": 0.99, "learning_rate": 8.024394158241053e-06, "loss": 0.0635, "theoretical_loss": 3.327210856254502, "tokens_seen": 3273654272 }, { "epoch": 0.99, "learning_rate": 7.944150216658643e-06, "loss": 0.0644, "theoretical_loss": 3.3271909055424587, "tokens_seen": 3273916416 }, { "epoch": 0.99, "learning_rate": 7.86390627507623e-06, "loss": 0.0621, "theoretical_loss": 3.327170956875063, "tokens_seen": 3274178560 }, { "epoch": 0.99, "learning_rate": 7.783662333493821e-06, "loss": 0.0632, "theoretical_loss": 3.3271510102519413, "tokens_seen": 3274440704 }, { "epoch": 0.99, "learning_rate": 7.70341839191141e-06, "loss": 0.0621, "theoretical_loss": 3.327131065672721, "tokens_seen": 3274702848 }, { "epoch": 0.99, "learning_rate": 7.623174450329001e-06, "loss": 0.0638, "theoretical_loss": 3.327111123137029, "tokens_seen": 3274964992 }, { "epoch": 0.99, "learning_rate": 7.54293050874659e-06, "loss": 0.0612, "theoretical_loss": 3.327091182644492, "tokens_seen": 3275227136 }, { "epoch": 0.99, "learning_rate": 7.462686567164179e-06, "loss": 0.0609, "theoretical_loss": 3.3270712441947374, "tokens_seen": 3275489280 }, { "epoch": 0.99, "learning_rate": 7.382442625581768e-06, "loss": 0.0627, "theoretical_loss": 3.327051307787393, "tokens_seen": 3275751424 }, { "epoch": 0.99, "learning_rate": 7.302198683999358e-06, "loss": 0.0613, "theoretical_loss": 3.327031373422085, "tokens_seen": 3276013568 }, { "epoch": 0.99, "learning_rate": 7.221954742416948e-06, "loss": 0.0643, "theoretical_loss": 3.3270114410984415, "tokens_seen": 3276275712 }, { "epoch": 0.99, "learning_rate": 7.141710800834537e-06, "loss": 0.0645, "theoretical_loss": 3.3269915108160903, "tokens_seen": 3276537856 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7547341464283754, "debugging/entropy-1-grams": 5.388662098705375, "debugging/length": 454.42105263157896, "debugging/num_segments": 19, "debugging/raw_token_scores_avg": 0.006775569636374712, "debugging/raw_token_scores_std": 0.018655896186828613, "debugging/score": 0.008703016206509368, "debugging/score_std": 0.010562036912131832, "epoch": 0.99, "objective/train/advantage_avg": 0.0003331316402181983, "objective/train/docs_used": 1190634, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3039427995681763, "objective/train/original_loss": 1.3039426803588867, "objective/train/theoretical_loss": 3.3269715825746586, "objective/train/tokens_used": 3297260000, "objective/train/value_avg": -0.007110595703125, "objective/train/value_loss": 0.00014388281852006912, "objective/train/value_max": -2.467632293701172e-05, "objective/train/value_min": -0.85791015625, "objective/train/value_reward_corr": 0.7662398062523196, "objective/train/value_std": 0.01428985595703125, "objective/train/weight_avg": 1.000400424003601, "objective/train/weighted_lm_loss": 1.3044633865356445, "objective/train/weights_max": 1.2395168542861938, "objective/train/weights_min": 0.37148112058639526, "theoretical_loss": 3.3269715825746586, "tokens_seen": 3276800000 }, { "epoch": 0.99, "learning_rate": 7.061466859252127e-06, "loss": 0.0623, "theoretical_loss": 3.3269715825746586, "tokens_seen": 3276800000 }, { "epoch": 0.99, "learning_rate": 6.981222917669716e-06, "loss": 0.0634, "theoretical_loss": 3.3269516563737747, "tokens_seen": 3277062144 }, { "epoch": 0.99, "learning_rate": 6.900978976087305e-06, "loss": 0.0667, "theoretical_loss": 3.326931732213066, "tokens_seen": 3277324288 }, { "epoch": 0.99, "learning_rate": 6.820735034504896e-06, "loss": 0.063, "theoretical_loss": 3.32691181009216, "tokens_seen": 3277586432 }, { "epoch": 0.99, "learning_rate": 6.7404910929224845e-06, "loss": 0.0625, "theoretical_loss": 3.326891890010686, "tokens_seen": 3277848576 }, { "epoch": 0.99, "learning_rate": 6.660247151340073e-06, "loss": 0.0624, "theoretical_loss": 3.326871971968271, "tokens_seen": 3278110720 }, { "epoch": 0.99, "learning_rate": 6.580003209757664e-06, "loss": 0.0645, "theoretical_loss": 3.326852055964544, "tokens_seen": 3278372864 }, { "epoch": 0.99, "learning_rate": 6.499759268175253e-06, "loss": 0.065, "theoretical_loss": 3.3268321419991325, "tokens_seen": 3278635008 }, { "epoch": 0.99, "learning_rate": 6.419515326592842e-06, "loss": 0.0617, "theoretical_loss": 3.326812230071666, "tokens_seen": 3278897152 }, { "epoch": 0.99, "learning_rate": 6.339271385010432e-06, "loss": 0.0626, "theoretical_loss": 3.326792320181772, "tokens_seen": 3279159296 }, { "epoch": 0.99, "learning_rate": 6.259027443428021e-06, "loss": 0.061, "theoretical_loss": 3.3267724123290803, "tokens_seen": 3279421440 }, { "epoch": 0.99, "learning_rate": 6.17878350184561e-06, "loss": 0.0644, "theoretical_loss": 3.3267525065132184, "tokens_seen": 3279683584 }, { "epoch": 0.99, "learning_rate": 6.098539560263201e-06, "loss": 0.0637, "theoretical_loss": 3.326732602733816, "tokens_seen": 3279945728 }, { "epoch": 0.99, "learning_rate": 6.01829561868079e-06, "loss": 0.0657, "theoretical_loss": 3.3267127009905018, "tokens_seen": 3280207872 }, { "epoch": 0.99, "learning_rate": 5.938051677098379e-06, "loss": 0.0649, "theoretical_loss": 3.3266928012829045, "tokens_seen": 3280470016 }, { "epoch": 0.99, "learning_rate": 5.857807735515969e-06, "loss": 0.0649, "theoretical_loss": 3.3266729036106533, "tokens_seen": 3280732160 }, { "epoch": 0.99, "learning_rate": 5.777563793933558e-06, "loss": 0.0607, "theoretical_loss": 3.326653007973378, "tokens_seen": 3280994304 }, { "epoch": 0.99, "learning_rate": 5.697319852351148e-06, "loss": 0.0608, "theoretical_loss": 3.3266331143707073, "tokens_seen": 3281256448 }, { "epoch": 0.99, "learning_rate": 5.6170759107687375e-06, "loss": 0.0632, "theoretical_loss": 3.326613222802271, "tokens_seen": 3281518592 }, { "epoch": 0.99, "learning_rate": 5.536831969186326e-06, "loss": 0.0649, "theoretical_loss": 3.326593333267698, "tokens_seen": 3281780736 }, { "epoch": 0.99, "learning_rate": 5.456588027603916e-06, "loss": 0.0636, "theoretical_loss": 3.3265734457666185, "tokens_seen": 3282042880 }, { "epoch": 0.99, "learning_rate": 5.376344086021506e-06, "loss": 0.063, "theoretical_loss": 3.3265535602986622, "tokens_seen": 3282305024 }, { "epoch": 0.99, "learning_rate": 5.296100144439095e-06, "loss": 0.0608, "theoretical_loss": 3.3265336768634586, "tokens_seen": 3282567168 }, { "epoch": 0.99, "learning_rate": 5.2158562028566845e-06, "loss": 0.0636, "theoretical_loss": 3.3265137954606376, "tokens_seen": 3282829312 }, { "epoch": 0.99, "learning_rate": 5.135612261274273e-06, "loss": 0.0624, "theoretical_loss": 3.3264939160898295, "tokens_seen": 3283091456 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.0002915835939347744, "objective/train/docs_used": 1192893, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.233608603477478, "objective/train/original_loss": 1.2336084842681885, "objective/train/theoretical_loss": 3.326474038750664, "objective/train/tokens_used": 3303813600, "objective/train/value_avg": -0.00603485107421875, "objective/train/value_loss": 0.0001250050845555961, "objective/train/value_max": -3.272294998168945e-05, "objective/train/value_min": -0.449951171875, "objective/train/value_reward_corr": 0.695894549483661, "objective/train/value_std": 0.010955810546875, "objective/train/weight_avg": 1.0003503561019897, "objective/train/weighted_lm_loss": 1.23422110080719, "objective/train/weights_max": 1.2648903131484985, "objective/train/weights_min": 0.5307503342628479, "theoretical_loss": 3.326474038750664, "tokens_seen": 3283353600 }, { "epoch": 0.99, "learning_rate": 5.055368319691863e-06, "loss": 0.0616, "theoretical_loss": 3.326474038750664, "tokens_seen": 3283353600 }, { "epoch": 1.0, "learning_rate": 4.975124378109453e-06, "loss": 0.0634, "theoretical_loss": 3.3264541634427713, "tokens_seen": 3283615744 }, { "epoch": 1.0, "learning_rate": 4.894880436527042e-06, "loss": 0.0633, "theoretical_loss": 3.3264342901657824, "tokens_seen": 3283877888 }, { "epoch": 1.0, "learning_rate": 4.814636494944632e-06, "loss": 0.0632, "theoretical_loss": 3.326414418919327, "tokens_seen": 3284140032 }, { "epoch": 1.0, "learning_rate": 4.734392553362221e-06, "loss": 0.0613, "theoretical_loss": 3.3263945497030356, "tokens_seen": 3284402176 }, { "epoch": 1.0, "learning_rate": 4.65414861177981e-06, "loss": 0.0648, "theoretical_loss": 3.326374682516539, "tokens_seen": 3284664320 }, { "epoch": 1.0, "learning_rate": 4.573904670197401e-06, "loss": 0.0636, "theoretical_loss": 3.3263548173594675, "tokens_seen": 3284926464 }, { "epoch": 1.0, "learning_rate": 4.49366072861499e-06, "loss": 0.0645, "theoretical_loss": 3.3263349542314526, "tokens_seen": 3285188608 }, { "epoch": 1.0, "learning_rate": 4.4134167870325786e-06, "loss": 0.0639, "theoretical_loss": 3.3263150931321244, "tokens_seen": 3285450752 }, { "epoch": 1.0, "learning_rate": 4.333172845450169e-06, "loss": 0.0615, "theoretical_loss": 3.326295234061114, "tokens_seen": 3285712896 }, { "epoch": 1.0, "learning_rate": 4.252928903867758e-06, "loss": 0.0624, "theoretical_loss": 3.326275377018053, "tokens_seen": 3285975040 }, { "epoch": 1.0, "learning_rate": 4.172684962285347e-06, "loss": 0.0625, "theoretical_loss": 3.326255522002572, "tokens_seen": 3286237184 }, { "epoch": 1.0, "learning_rate": 4.0924410207029375e-06, "loss": 0.0635, "theoretical_loss": 3.3262356690143022, "tokens_seen": 3286499328 }, { "epoch": 1.0, "learning_rate": 4.012197079120526e-06, "loss": 0.0633, "theoretical_loss": 3.3262158180528756, "tokens_seen": 3286761472 }, { "epoch": 1.0, "learning_rate": 3.931953137538115e-06, "loss": 0.0604, "theoretical_loss": 3.326195969117923, "tokens_seen": 3287023616 }, { "epoch": 1.0, "learning_rate": 3.851709195955705e-06, "loss": 0.0644, "theoretical_loss": 3.326176122209076, "tokens_seen": 3287285760 }, { "epoch": 1.0, "learning_rate": 3.771465254373295e-06, "loss": 0.0656, "theoretical_loss": 3.3261562773259667, "tokens_seen": 3287547904 }, { "epoch": 1.0, "learning_rate": 3.691221312790884e-06, "loss": 0.0643, "theoretical_loss": 3.3261364344682267, "tokens_seen": 3287810048 }, { "epoch": 1.0, "learning_rate": 3.610977371208474e-06, "loss": 0.0638, "theoretical_loss": 3.3261165936354873, "tokens_seen": 3288072192 }, { "epoch": 1.0, "learning_rate": 3.5307334296260636e-06, "loss": 0.0627, "theoretical_loss": 3.3260967548273808, "tokens_seen": 3288334336 }, { "epoch": 1.0, "learning_rate": 3.4504894880436525e-06, "loss": 0.0626, "theoretical_loss": 3.326076918043539, "tokens_seen": 3288596480 }, { "epoch": 1.0, "learning_rate": 3.3702455464612422e-06, "loss": 0.0631, "theoretical_loss": 3.3260570832835943, "tokens_seen": 3288858624 }, { "epoch": 1.0, "learning_rate": 3.290001604878832e-06, "loss": 0.0633, "theoretical_loss": 3.326037250547179, "tokens_seen": 3289120768 }, { "epoch": 1.0, "learning_rate": 3.209757663296421e-06, "loss": 0.0602, "theoretical_loss": 3.326017419833925, "tokens_seen": 3289382912 }, { "epoch": 1.0, "learning_rate": 3.1295137217140106e-06, "loss": 0.0603, "theoretical_loss": 3.3259975911434654, "tokens_seen": 3289645056 }, { "epoch": 1.0, "objective/train/advantage_avg": 0.0004319137951824814, "objective/train/docs_used": 1195204, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2409605979919434, "objective/train/original_loss": 1.2409604787826538, "objective/train/theoretical_loss": 3.3259777644754323, "objective/train/tokens_used": 3310367200, "objective/train/value_avg": -0.005619049072265625, "objective/train/value_loss": 0.00011758451000787318, "objective/train/value_max": -2.1457672119140625e-05, "objective/train/value_min": -0.44287109375, "objective/train/value_reward_corr": 0.7364096476912639, "objective/train/value_std": 0.0122528076171875, "objective/train/weight_avg": 1.0004870891571045, "objective/train/weighted_lm_loss": 1.2417213916778564, "objective/train/weights_max": 1.5326497554779053, "objective/train/weights_min": 0.37251704931259155, "theoretical_loss": 3.3259777644754323, "tokens_seen": 3289907200 }, { "epoch": 1.0, "learning_rate": 3.0492697801316004e-06, "loss": 0.063, "theoretical_loss": 3.3259777644754323, "tokens_seen": 3289907200 }, { "epoch": 1.0, "learning_rate": 2.9690258385491897e-06, "loss": 0.0647, "theoretical_loss": 3.3259579398294576, "tokens_seen": 3290169344 }, { "epoch": 1.0, "learning_rate": 2.888781896966779e-06, "loss": 0.0621, "theoretical_loss": 3.325938117205175, "tokens_seen": 3290431488 }, { "epoch": 1.0, "learning_rate": 2.8085379553843687e-06, "loss": 0.0612, "theoretical_loss": 3.3259182966022167, "tokens_seen": 3290693632 }, { "epoch": 1.0, "learning_rate": 2.728294013801958e-06, "loss": 0.0617, "theoretical_loss": 3.3258984780202163, "tokens_seen": 3290955776 }, { "epoch": 1.0, "learning_rate": 2.6480500722195474e-06, "loss": 0.0622, "theoretical_loss": 3.325878661458806, "tokens_seen": 3291217920 }, { "epoch": 1.0, "learning_rate": 2.5678061306371367e-06, "loss": 0.0639, "theoretical_loss": 3.325858846917619, "tokens_seen": 3291480064 }, { "epoch": 1.0, "learning_rate": 2.4875621890547264e-06, "loss": 0.0626, "theoretical_loss": 3.3258390343962887, "tokens_seen": 3291742208 }, { "epoch": 1.0, "learning_rate": 2.407318247472316e-06, "loss": 0.0638, "theoretical_loss": 3.3258192238944484, "tokens_seen": 3292004352 }, { "epoch": 1.0, "learning_rate": 2.327074305889905e-06, "loss": 0.0606, "theoretical_loss": 3.3257994154117316, "tokens_seen": 3292266496 }, { "epoch": 1.0, "learning_rate": 2.246830364307495e-06, "loss": 0.0625, "theoretical_loss": 3.325779608947771, "tokens_seen": 3292528640 }, { "epoch": 1.0, "learning_rate": 2.1665864227250846e-06, "loss": 0.0625, "theoretical_loss": 3.3257598045022014, "tokens_seen": 3292790784 }, { "epoch": 1.0, "learning_rate": 2.0863424811426735e-06, "loss": 0.0638, "theoretical_loss": 3.3257400020746553, "tokens_seen": 3293052928 }, { "epoch": 1.0, "learning_rate": 2.006098539560263e-06, "loss": 0.0632, "theoretical_loss": 3.325720201664767, "tokens_seen": 3293315072 }, { "epoch": 1.0, "learning_rate": 1.9258545979778525e-06, "loss": 0.0623, "theoretical_loss": 3.32570040327217, "tokens_seen": 3293577216 }, { "epoch": 1.0, "learning_rate": 1.845610656395442e-06, "loss": 0.0625, "theoretical_loss": 3.325680606896499, "tokens_seen": 3293839360 }, { "epoch": 1.0, "learning_rate": 1.7653667148130318e-06, "loss": 0.0639, "theoretical_loss": 3.325660812537387, "tokens_seen": 3294101504 }, { "epoch": 1.0, "learning_rate": 1.6851227732306211e-06, "loss": 0.0648, "theoretical_loss": 3.3256410201944693, "tokens_seen": 3294363648 }, { "epoch": 1.0, "learning_rate": 1.6048788316482104e-06, "loss": 0.063, "theoretical_loss": 3.325621229867379, "tokens_seen": 3294625792 }, { "epoch": 1.0, "learning_rate": 1.5246348900658002e-06, "loss": 0.0644, "theoretical_loss": 3.3256014415557504, "tokens_seen": 3294887936 }, { "epoch": 1.0, "learning_rate": 1.4443909484833895e-06, "loss": 0.0631, "theoretical_loss": 3.325581655259219, "tokens_seen": 3295150080 }, { "epoch": 1.0, "learning_rate": 1.364147006900979e-06, "loss": 0.0624, "theoretical_loss": 3.3255618709774186, "tokens_seen": 3295412224 }, { "epoch": 1.0, "learning_rate": 1.2839030653185684e-06, "loss": 0.0656, "theoretical_loss": 3.3255420887099842, "tokens_seen": 3295674368 }, { "epoch": 1.0, "learning_rate": 1.203659123736158e-06, "loss": 0.0612, "theoretical_loss": 3.32552230845655, "tokens_seen": 3295936512 }, { "epoch": 1.0, "learning_rate": 1.1234151821537474e-06, "loss": 0.0634, "theoretical_loss": 3.3255025302167507, "tokens_seen": 3296198656 }, { "epoch": 1.0, "objective/train/advantage_avg": 0.00032849638955667615, "objective/train/docs_used": 1197549, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.261033296585083, "objective/train/original_loss": 1.2610334157943726, "objective/train/theoretical_loss": 3.3254827539902223, "objective/train/tokens_used": 3316920800, "objective/train/value_avg": -0.0079345703125, "objective/train/value_loss": 0.00026692645042203367, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.73681640625, "objective/train/value_reward_corr": 0.7701488647382327, "objective/train/value_std": 0.0187225341796875, "objective/train/weight_avg": 1.000449299812317, "objective/train/weighted_lm_loss": 1.2615493535995483, "objective/train/weights_max": 1.6513952016830444, "objective/train/weights_min": 0.36833932995796204, "theoretical_loss": 3.3254827539902223, "tokens_seen": 3296460800 }, { "epoch": 1.0, "learning_rate": 1.0431712405713367e-06, "loss": 0.063, "theoretical_loss": 3.3254827539902223, "tokens_seen": 3296460800 }, { "epoch": 1.0, "learning_rate": 9.629272989889263e-07, "loss": 0.0665, "theoretical_loss": 3.3254629797765984, "tokens_seen": 3296722944 }, { "epoch": 1.0, "learning_rate": 8.826833574065159e-07, "loss": 0.0624, "theoretical_loss": 3.325443207575515, "tokens_seen": 3296985088 }, { "epoch": 1.0, "learning_rate": 8.024394158241052e-07, "loss": 0.0681, "theoretical_loss": 3.3254234373866067, "tokens_seen": 3297247232 }, { "epoch": 1.0, "learning_rate": 7.221954742416948e-07, "loss": 0.0619, "theoretical_loss": 3.325403669209509, "tokens_seen": 3297509376 }, { "epoch": 1.0, "learning_rate": 6.419515326592842e-07, "loss": 0.0643, "theoretical_loss": 3.3253839030438574, "tokens_seen": 3297771520 }, { "epoch": 1.0, "learning_rate": 5.617075910768737e-07, "loss": 0.0637, "theoretical_loss": 3.325364138889287, "tokens_seen": 3298033664 }, { "epoch": 1.0, "learning_rate": 4.814636494944631e-07, "loss": 0.0675, "theoretical_loss": 3.3253443767454343, "tokens_seen": 3298295808 }, { "epoch": 1.0, "learning_rate": 4.012197079120526e-07, "loss": 0.0648, "theoretical_loss": 3.325324616611934, "tokens_seen": 3298557952 }, { "epoch": 1.0, "learning_rate": 3.209757663296421e-07, "loss": 0.0614, "theoretical_loss": 3.325304858488422, "tokens_seen": 3298820096 }, { "epoch": 1.0, "learning_rate": 2.4073182474723157e-07, "loss": 0.0608, "theoretical_loss": 3.3252851023745347, "tokens_seen": 3299082240 }, { "epoch": 1.0, "learning_rate": 1.6048788316482104e-07, "loss": 0.062, "theoretical_loss": 3.325265348269907, "tokens_seen": 3299344384 }, { "epoch": 1.0, "learning_rate": 8.024394158241052e-08, "loss": 0.0618, "theoretical_loss": 3.325245596174176, "tokens_seen": 3299606528 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.0602, "theoretical_loss": 3.325225846086977, "tokens_seen": 3299868672 } ], "max_steps": 12588, "num_train_epochs": 9223372036854775807, "total_flos": 1.6840569843886326e+18, "trial_name": null, "trial_params": null }